diff options
Diffstat (limited to 'contrib/llvm/lib/Target/PTX')
41 files changed, 6530 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp new file mode 100644 index 0000000..1830213 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp @@ -0,0 +1,249 @@ +//===-- PTXInstPrinter.cpp - Convert PTX MCInst to assembly syntax --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints a PTX MCInst to a .ptx file. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "PTXInstPrinter.h" +#include "MCTargetDesc/PTXBaseInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#include "PTXGenAsmWriter.inc" + +PTXInstPrinter::PTXInstPrinter(const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI) : + MCInstPrinter(MAI, MII, MRI) { + // Initialize the set of available features. + setAvailableFeatures(STI.getFeatureBits()); +} + +void PTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { + // Decode the register number into type and offset + unsigned RegSpace = RegNo & 0x7; + unsigned RegType = (RegNo >> 3) & 0x7; + unsigned RegOffset = RegNo >> 6; + + // Print the register + OS << "%"; + + switch (RegSpace) { + default: + llvm_unreachable("Unknown register space!"); + case PTXRegisterSpace::Reg: + switch (RegType) { + default: + llvm_unreachable("Unknown register type!"); + case PTXRegisterType::Pred: + OS << "p"; + break; + case PTXRegisterType::B16: + OS << "rh"; + break; + case PTXRegisterType::B32: + OS << "r"; + break; + case PTXRegisterType::B64: + OS << "rd"; + break; + case PTXRegisterType::F32: + OS << "f"; + break; + case PTXRegisterType::F64: + OS << "fd"; + break; + } + break; + case PTXRegisterSpace::Return: + OS << "ret"; + break; + case PTXRegisterSpace::Argument: + OS << "arg"; + break; + } + + OS << RegOffset; +} + +void PTXInstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { + printPredicate(MI, O); + switch (MI->getOpcode()) { + default: + printInstruction(MI, O); + break; + case PTX::CALL: + printCall(MI, O); + } + O << ";"; + printAnnotation(O, Annot); +} + +void PTXInstPrinter::printPredicate(const MCInst *MI, raw_ostream &O) { + // The last two operands are the predicate operands + int RegIndex; + int OpIndex; + + if (MI->getOpcode() == PTX::CALL) { + RegIndex = 0; + OpIndex = 1; + } else { + RegIndex = MI->getNumOperands()-2; + OpIndex = MI->getNumOperands()-1; + } + + int PredOp = MI->getOperand(OpIndex).getImm(); + if (PredOp == PTXPredicate::None) + return; + + if (PredOp == PTXPredicate::Negate) + O << '!'; + else + O << '@'; + + printOperand(MI, RegIndex, O); +} + +void PTXInstPrinter::printCall(const MCInst *MI, raw_ostream &O) { + O << "\tcall.uni\t"; + // The first two operands are the predicate slot + unsigned Index = 2; + unsigned NumRets = MI->getOperand(Index++).getImm(); + + if (NumRets > 0) { + O << "("; + printOperand(MI, Index++, O); + for (unsigned i = 1; i < NumRets; ++i) { + O << ", "; + printOperand(MI, Index++, O); + } + O << "), "; + } + + const MCExpr* Expr = MI->getOperand(Index++).getExpr(); + unsigned NumArgs = MI->getOperand(Index++).getImm(); + + // if the function call is to printf or puts, change to vprintf + if (const MCSymbolRefExpr *SymRefExpr = dyn_cast<MCSymbolRefExpr>(Expr)) { + const MCSymbol &Sym = SymRefExpr->getSymbol(); + if (Sym.getName() == "printf" || Sym.getName() == "puts") { + O << "vprintf"; + } else { + O << Sym.getName(); + } + } else { + O << *Expr; + } + + O << ", ("; + + if (NumArgs > 0) { + printOperand(MI, Index++, O); + for (unsigned i = 1; i < NumArgs; ++i) { + O << ", "; + printOperand(MI, Index++, O); + } + } + O << ")"; +} + +void PTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) { + O << Op.getImm(); + } else if (Op.isFPImm()) { + double Imm = Op.getFPImm(); + APFloat FPImm(Imm); + APInt FPIntImm = FPImm.bitcastToAPInt(); + O << "0D"; + // PTX requires us to output the full 64 bits, even if the number is zero + if (FPIntImm.getZExtValue() > 0) { + O << FPIntImm.toString(16, false); + } else { + O << "0000000000000000"; + } + } else if (Op.isReg()) { + printRegName(O, Op.getReg()); + } else { + assert(Op.isExpr() && "unknown operand kind in printOperand"); + const MCExpr *Expr = Op.getExpr(); + if (const MCSymbolRefExpr *SymRefExpr = dyn_cast<MCSymbolRefExpr>(Expr)) { + const MCSymbol &Sym = SymRefExpr->getSymbol(); + O << Sym.getName(); + } else { + O << *Op.getExpr(); + } + } +} + +void PTXInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + // By definition, operand OpNo+1 is an i32imm + const MCOperand &Op2 = MI->getOperand(OpNo+1); + printOperand(MI, OpNo, O); + if (Op2.getImm() == 0) + return; // don't print "+0" + O << "+" << Op2.getImm(); +} + +void PTXInstPrinter::printRoundingMode(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + assert (Op.isImm() && "Rounding modes must be immediate values"); + switch (Op.getImm()) { + default: + llvm_unreachable("Unknown rounding mode!"); + case PTXRoundingMode::RndDefault: + llvm_unreachable("FP rounding-mode pass did not handle instruction!"); + case PTXRoundingMode::RndNone: + // Do not print anything. + break; + case PTXRoundingMode::RndNearestEven: + O << ".rn"; + break; + case PTXRoundingMode::RndTowardsZero: + O << ".rz"; + break; + case PTXRoundingMode::RndNegInf: + O << ".rm"; + break; + case PTXRoundingMode::RndPosInf: + O << ".rp"; + break; + case PTXRoundingMode::RndApprox: + O << ".approx"; + break; + case PTXRoundingMode::RndNearestEvenInt: + O << ".rni"; + break; + case PTXRoundingMode::RndTowardsZeroInt: + O << ".rzi"; + break; + case PTXRoundingMode::RndNegInfInt: + O << ".rmi"; + break; + case PTXRoundingMode::RndPosInfInt: + O << ".rpi"; + break; + } +} + diff --git a/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h new file mode 100644 index 0000000..ea4d504 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h @@ -0,0 +1,45 @@ +//===- PTXInstPrinter.h - Convert PTX MCInst to assembly syntax -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints n PTX MCInst to a .ptx file. +// +//===----------------------------------------------------------------------===// + +#ifndef PTXINSTPRINTER_H +#define PTXINSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCSubtargetInfo.h" + +namespace llvm { + +class MCOperand; + +class PTXInstPrinter : public MCInstPrinter { +public: + PTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); + + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + + void printPredicate(const MCInst *MI, raw_ostream &O); + void printCall(const MCInst *MI, raw_ostream &O); + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printRoundingMode(const MCInst *MI, unsigned OpNo, raw_ostream &O); +}; +} + +#endif + diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h new file mode 100644 index 0000000..a3e0f32 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h @@ -0,0 +1,134 @@ +//===-- PTXBaseInfo.h - Top level definitions for PTX -------- --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the PTX target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core +// code gen types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef PTXBASEINFO_H +#define PTXBASEINFO_H + +#include "PTXMCTargetDesc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + namespace PTXStateSpace { + enum { + Global = 0, // default to global state space + Constant = 1, + Local = 2, + Parameter = 3, + Shared = 4 + }; + } // namespace PTXStateSpace + + namespace PTXPredicate { + enum { + Normal = 0, + Negate = 1, + None = 2 + }; + } // namespace PTXPredicate + + /// Namespace to hold all target-specific flags. + namespace PTXRoundingMode { + // Instruction Flags + enum { + // Rounding Mode Flags + RndMask = 15, + RndDefault = 0, // --- + RndNone = 1, // <NONE> + RndNearestEven = 2, // .rn + RndTowardsZero = 3, // .rz + RndNegInf = 4, // .rm + RndPosInf = 5, // .rp + RndApprox = 6, // .approx + RndNearestEvenInt = 7, // .rni + RndTowardsZeroInt = 8, // .rzi + RndNegInfInt = 9, // .rmi + RndPosInfInt = 10 // .rpi + }; + } // namespace PTXII + + namespace PTXRegisterType { + // Register type encoded in MCOperands + enum { + Pred = 0, + B16, + B32, + B64, + F32, + F64 + }; + } // namespace PTXRegisterType + + namespace PTXRegisterSpace { + // Register space encoded in MCOperands + enum { + Reg = 0, + Local, + Param, + Argument, + Return + }; + } + + inline static void decodeRegisterName(raw_ostream &OS, + unsigned EncodedReg) { + OS << "%"; + + unsigned RegSpace = EncodedReg & 0x7; + unsigned RegType = (EncodedReg >> 3) & 0x7; + unsigned RegOffset = EncodedReg >> 6; + + switch (RegSpace) { + default: + llvm_unreachable("Unknown register space!"); + case PTXRegisterSpace::Reg: + switch (RegType) { + default: + llvm_unreachable("Unknown register type!"); + case PTXRegisterType::Pred: + OS << "p"; + break; + case PTXRegisterType::B16: + OS << "rh"; + break; + case PTXRegisterType::B32: + OS << "r"; + break; + case PTXRegisterType::B64: + OS << "rd"; + break; + case PTXRegisterType::F32: + OS << "f"; + break; + case PTXRegisterType::F64: + OS << "fd"; + break; + } + break; + case PTXRegisterSpace::Return: + OS << "ret"; + break; + case PTXRegisterSpace::Argument: + OS << "arg"; + break; + } + + OS << RegOffset; + } +} // namespace llvm + +#endif + diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp new file mode 100644 index 0000000..cdfbc80 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp @@ -0,0 +1,37 @@ +//===-- PTXMCAsmInfo.cpp - PTX asm properties -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the PTXMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "PTXMCAsmInfo.h" +#include "llvm/ADT/Triple.h" + +using namespace llvm; + +void PTXMCAsmInfo::anchor() { } + +PTXMCAsmInfo::PTXMCAsmInfo(const Target &T, const StringRef &TT) { + Triple TheTriple(TT); + if (TheTriple.getArch() == Triple::ptx64) + PointerSize = 8; + + CommentString = "//"; + + PrivateGlobalPrefix = "$L__"; + + AllowPeriodsInName = false; + + HasSetDirective = false; + + HasDotTypeDotSizeDirective = false; + + HasSingleParameterDotFile = false; +} diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h new file mode 100644 index 0000000..32ca069 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h @@ -0,0 +1,30 @@ +//===-- PTXMCAsmInfo.h - PTX asm properties --------------------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the PTXMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_MCASM_INFO_H +#define PTX_MCASM_INFO_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { + class Target; + class StringRef; + + class PTXMCAsmInfo : public MCAsmInfo { + virtual void anchor(); + public: + explicit PTXMCAsmInfo(const Target &T, const StringRef &TT); + }; +} // namespace llvm + +#endif // PTX_MCASM_INFO_H diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp new file mode 100644 index 0000000..08fb970 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp @@ -0,0 +1,98 @@ +//===-- PTXMCTargetDesc.cpp - PTX Target Descriptions ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides PTX specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "PTXMCTargetDesc.h" +#include "PTXMCAsmInfo.h" +#include "InstPrinter/PTXInstPrinter.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "PTXGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "PTXGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "PTXGenRegisterInfo.inc" + +using namespace llvm; + +static MCInstrInfo *createPTXMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitPTXMCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createPTXMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + // PTX does not have a return address register. + InitPTXMCRegisterInfo(X, 0); + return X; +} + +static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitPTXMCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +static MCCodeGenInfo *createPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + X->InitMCCodeGenInfo(RM, CM, OL); + return X; +} + +static MCInstPrinter *createPTXMCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI) { + assert(SyntaxVariant == 0 && "We only have one syntax variant"); + return new PTXInstPrinter(MAI, MII, MRI, STI); +} + +extern "C" void LLVMInitializePTXTargetMC() { + // Register the MC asm info. + RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target); + RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(ThePTX32Target, createPTXMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(ThePTX64Target, createPTXMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(ThePTX32Target, createPTXMCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(ThePTX64Target, createPTXMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(ThePTX32Target, + createPTXMCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(ThePTX64Target, + createPTXMCSubtargetInfo); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(ThePTX32Target, createPTXMCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(ThePTX64Target, createPTXMCInstPrinter); +} diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h new file mode 100644 index 0000000..542638a --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h @@ -0,0 +1,36 @@ +//===-- PTXMCTargetDesc.h - PTX Target Descriptions ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides PTX specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef PTXMCTARGETDESC_H +#define PTXMCTARGETDESC_H + +namespace llvm { +class Target; + +extern Target ThePTX32Target; +extern Target ThePTX64Target; + +} // End llvm namespace + +// Defines symbolic names for PTX registers. +#define GET_REGINFO_ENUM +#include "PTXGenRegisterInfo.inc" + +// Defines symbolic names for the PTX instructions. +#define GET_INSTRINFO_ENUM +#include "PTXGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "PTXGenSubtargetInfo.inc" + +#endif diff --git a/contrib/llvm/lib/Target/PTX/PTX.h b/contrib/llvm/lib/Target/PTX/PTX.h new file mode 100644 index 0000000..ffb92cb --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTX.h @@ -0,0 +1,43 @@ +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// PTX back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_H +#define PTX_H + +#include "MCTargetDesc/PTXBaseInfo.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + class MachineInstr; + class MCInst; + class PTXAsmPrinter; + class PTXTargetMachine; + class FunctionPass; + + FunctionPass *createPTXISelDag(PTXTargetMachine &TM, + CodeGenOpt::Level OptLevel); + + FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM, + CodeGenOpt::Level OptLevel); + + FunctionPass *createPTXFPRoundingModePass(PTXTargetMachine &TM, + CodeGenOpt::Level OptLevel); + + FunctionPass *createPTXRegisterAllocator(); + + void LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, + PTXAsmPrinter &AP); + +} // namespace llvm; + +#endif // PTX_H diff --git a/contrib/llvm/lib/Target/PTX/PTX.td b/contrib/llvm/lib/Target/PTX/PTX.td new file mode 100644 index 0000000..994a68e --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTX.td @@ -0,0 +1,141 @@ +//===-- PTX.td - Describe the PTX Target Machine -----------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This is the top level entry point for the PTX target. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// Subtarget Features +//===----------------------------------------------------------------------===// + +//===- Architectural Features ---------------------------------------------===// + +def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true", + "Do not demote .f64 to .f32">; + +def FeatureNoFMA : SubtargetFeature<"no-fma","SupportsFMA", "false", + "Disable Fused-Multiply Add">; + +//===- PTX Version --------------------------------------------------------===// + +def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0", + "Use PTX Language Version 2.0">; + +def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1", + "Use PTX Language Version 2.1">; + +def FeaturePTX22 : SubtargetFeature<"ptx22", "PTXVersion", "PTX_VERSION_2_2", + "Use PTX Language Version 2.2">; + +def FeaturePTX23 : SubtargetFeature<"ptx23", "PTXVersion", "PTX_VERSION_2_3", + "Use PTX Language Version 2.3">; + +//===- PTX Target ---------------------------------------------------------===// + +def FeatureSM10 : SubtargetFeature<"sm10", "PTXTarget", "PTX_SM_1_0", + "Use Shader Model 1.0">; +def FeatureSM11 : SubtargetFeature<"sm11", "PTXTarget", "PTX_SM_1_1", + "Use Shader Model 1.1">; +def FeatureSM12 : SubtargetFeature<"sm12", "PTXTarget", "PTX_SM_1_2", + "Use Shader Model 1.2">; +def FeatureSM13 : SubtargetFeature<"sm13", "PTXTarget", "PTX_SM_1_3", + "Use Shader Model 1.3">; +def FeatureSM20 : SubtargetFeature<"sm20", "PTXTarget", "PTX_SM_2_0", + "Use Shader Model 2.0", [FeatureDouble]>; +def FeatureSM21 : SubtargetFeature<"sm21", "PTXTarget", "PTX_SM_2_1", + "Use Shader Model 2.1", [FeatureDouble]>; +def FeatureSM22 : SubtargetFeature<"sm22", "PTXTarget", "PTX_SM_2_2", + "Use Shader Model 2.2", [FeatureDouble]>; +def FeatureSM23 : SubtargetFeature<"sm23", "PTXTarget", "PTX_SM_2_3", + "Use Shader Model 2.3", [FeatureDouble]>; + +def FeatureCOMPUTE10 : SubtargetFeature<"compute10", "PTXTarget", + "PTX_COMPUTE_1_0", + "Use Compute Compatibility 1.0">; +def FeatureCOMPUTE11 : SubtargetFeature<"compute11", "PTXTarget", + "PTX_COMPUTE_1_1", + "Use Compute Compatibility 1.1">; +def FeatureCOMPUTE12 : SubtargetFeature<"compute12", "PTXTarget", + "PTX_COMPUTE_1_2", + "Use Compute Compatibility 1.2">; +def FeatureCOMPUTE13 : SubtargetFeature<"compute13", "PTXTarget", + "PTX_COMPUTE_1_3", + "Use Compute Compatibility 1.3">; +def FeatureCOMPUTE20 : SubtargetFeature<"compute20", "PTXTarget", + "PTX_COMPUTE_2_0", + "Use Compute Compatibility 2.0", + [FeatureDouble]>; + +//===----------------------------------------------------------------------===// +// PTX supported processors +//===----------------------------------------------------------------------===// + +class Proc<string Name, list<SubtargetFeature> Features> + : Processor<Name, NoItineraries, Features>; + +def : Proc<"generic", []>; + +// Processor definitions for compute/shader models +def : Proc<"compute_10", [FeatureCOMPUTE10]>; +def : Proc<"compute_11", [FeatureCOMPUTE11]>; +def : Proc<"compute_12", [FeatureCOMPUTE12]>; +def : Proc<"compute_13", [FeatureCOMPUTE13]>; +def : Proc<"compute_20", [FeatureCOMPUTE20]>; +def : Proc<"sm_10", [FeatureSM10]>; +def : Proc<"sm_11", [FeatureSM11]>; +def : Proc<"sm_12", [FeatureSM12]>; +def : Proc<"sm_13", [FeatureSM13]>; +def : Proc<"sm_20", [FeatureSM20]>; +def : Proc<"sm_21", [FeatureSM21]>; +def : Proc<"sm_22", [FeatureSM22]>; +def : Proc<"sm_23", [FeatureSM23]>; + +// Processor definitions for common GPU architectures +def : Proc<"g80", [FeatureSM10]>; +def : Proc<"gt200", [FeatureSM13]>; +def : Proc<"gf100", [FeatureSM20, FeatureDouble]>; +def : Proc<"fermi", [FeatureSM20, FeatureDouble]>; + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "PTXRegisterInfo.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "PTXInstrInfo.td" + +def PTXInstrInfo : InstrInfo; + +//===----------------------------------------------------------------------===// +// Assembly printer +//===----------------------------------------------------------------------===// +// PTX uses the MC printer for asm output, so make sure the TableGen +// AsmWriter bits get associated with the correct class. +def PTXAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + bit isMCAsmWriter = 1; +} + +//===----------------------------------------------------------------------===// +// Target Declaration +//===----------------------------------------------------------------------===// + +def PTX : Target { + let InstructionSet = PTXInstrInfo; + let AssemblyWriters = [PTXAsmWriter]; +} diff --git a/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp new file mode 100644 index 0000000..0b6ac7b --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp @@ -0,0 +1,561 @@ +//===-- PTXAsmPrinter.cpp - PTX LLVM assembly writer ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to PTX assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ptx-asm-printer" + +#include "PTXAsmPrinter.h" +#include "PTX.h" +#include "PTXMachineFunctionInfo.h" +#include "PTXParamManager.h" +#include "PTXRegisterInfo.h" +#include "PTXTargetMachine.h" +#include "llvm/Argument.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static const char PARAM_PREFIX[] = "__param_"; +static const char RETURN_PREFIX[] = "__ret_"; + +static const char *getRegisterTypeName(unsigned RegType) { + switch (RegType) { + default: + llvm_unreachable("Unknown register type"); + case PTXRegisterType::Pred: + return ".pred"; + case PTXRegisterType::B16: + return ".b16"; + case PTXRegisterType::B32: + return ".b32"; + case PTXRegisterType::B64: + return ".b64"; + case PTXRegisterType::F32: + return ".f32"; + case PTXRegisterType::F64: + return ".f64"; + } +} + +static const char *getStateSpaceName(unsigned addressSpace) { + switch (addressSpace) { + default: llvm_unreachable("Unknown state space"); + case PTXStateSpace::Global: return "global"; + case PTXStateSpace::Constant: return "const"; + case PTXStateSpace::Local: return "local"; + case PTXStateSpace::Parameter: return "param"; + case PTXStateSpace::Shared: return "shared"; + } +} + +static const char *getTypeName(Type* type) { + while (true) { + switch (type->getTypeID()) { + default: llvm_unreachable("Unknown type"); + case Type::FloatTyID: return ".f32"; + case Type::DoubleTyID: return ".f64"; + case Type::IntegerTyID: + switch (type->getPrimitiveSizeInBits()) { + default: llvm_unreachable("Unknown integer bit-width"); + case 16: return ".u16"; + case 32: return ".u32"; + case 64: return ".u64"; + } + case Type::ArrayTyID: + case Type::PointerTyID: + type = dyn_cast<SequentialType>(type)->getElementType(); + break; + } + } + return NULL; +} + +bool PTXAsmPrinter::doFinalization(Module &M) { + // XXX Temproarily remove global variables so that doFinalization() will not + // emit them again (global variables are emitted at beginning). + + Module::GlobalListType &global_list = M.getGlobalList(); + int i, n = global_list.size(); + GlobalVariable **gv_array = new GlobalVariable* [n]; + + // first, back-up GlobalVariable in gv_array + i = 0; + for (Module::global_iterator I = global_list.begin(), E = global_list.end(); + I != E; ++I) + gv_array[i++] = &*I; + + // second, empty global_list + while (!global_list.empty()) + global_list.remove(global_list.begin()); + + // call doFinalization + bool ret = AsmPrinter::doFinalization(M); + + // now we restore global variables + for (i = 0; i < n; i ++) + global_list.insert(global_list.end(), gv_array[i]); + + delete[] gv_array; + return ret; +} + +void PTXAsmPrinter::EmitStartOfAsmFile(Module &M) +{ + const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>(); + + // Emit the PTX .version and .target attributes + OutStreamer.EmitRawText(Twine("\t.version ") + ST.getPTXVersionString()); + OutStreamer.EmitRawText(Twine("\t.target ") + ST.getTargetString() + + (ST.supportsDouble() ? "" + : ", map_f64_to_f32")); + // .address_size directive is optional, but it must immediately follow + // the .target directive if present within a module + if (ST.supportsPTX23()) { + const char *addrSize = ST.is64Bit() ? "64" : "32"; + OutStreamer.EmitRawText(Twine("\t.address_size ") + addrSize); + } + + OutStreamer.AddBlankLine(); + + // Define any .file directives + DebugInfoFinder DbgFinder; + DbgFinder.processModule(M); + + for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), + E = DbgFinder.compile_unit_end(); I != E; ++I) { + DICompileUnit DIUnit(*I); + StringRef FN = DIUnit.getFilename(); + StringRef Dir = DIUnit.getDirectory(); + GetOrCreateSourceID(FN, Dir); + } + + OutStreamer.AddBlankLine(); + + // declare external functions + for (Module::const_iterator i = M.begin(), e = M.end(); + i != e; ++i) + EmitFunctionDeclaration(i); + + // declare global variables + for (Module::const_global_iterator i = M.global_begin(), e = M.global_end(); + i != e; ++i) + EmitVariableDeclaration(i); +} + +void PTXAsmPrinter::EmitFunctionBodyStart() { + OutStreamer.EmitRawText(Twine("{")); + + const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); + const PTXParamManager &PM = MFI->getParamManager(); + + // Print register definitions + SmallString<128> regDefs; + raw_svector_ostream os(regDefs); + unsigned numRegs; + + // pred + numRegs = MFI->countRegisters(PTXRegisterType::Pred, PTXRegisterSpace::Reg); + if(numRegs > 0) + os << "\t.reg .pred %p<" << numRegs << ">;\n"; + + // i16 + numRegs = MFI->countRegisters(PTXRegisterType::B16, PTXRegisterSpace::Reg); + if(numRegs > 0) + os << "\t.reg .b16 %rh<" << numRegs << ">;\n"; + + // i32 + numRegs = MFI->countRegisters(PTXRegisterType::B32, PTXRegisterSpace::Reg); + if(numRegs > 0) + os << "\t.reg .b32 %r<" << numRegs << ">;\n"; + + // i64 + numRegs = MFI->countRegisters(PTXRegisterType::B64, PTXRegisterSpace::Reg); + if(numRegs > 0) + os << "\t.reg .b64 %rd<" << numRegs << ">;\n"; + + // f32 + numRegs = MFI->countRegisters(PTXRegisterType::F32, PTXRegisterSpace::Reg); + if(numRegs > 0) + os << "\t.reg .f32 %f<" << numRegs << ">;\n"; + + // f64 + numRegs = MFI->countRegisters(PTXRegisterType::F64, PTXRegisterSpace::Reg); + if(numRegs > 0) + os << "\t.reg .f64 %fd<" << numRegs << ">;\n"; + + // Local params + for (PTXParamManager::param_iterator i = PM.local_begin(), e = PM.local_end(); + i != e; ++i) + os << "\t.param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i) + << ";\n"; + + OutStreamer.EmitRawText(os.str()); + + + const MachineFrameInfo* FrameInfo = MF->getFrameInfo(); + DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects() + << " frame object(s)\n"); + for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) { + DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n"); + if (FrameInfo->getObjectSize(i) > 0) { + OutStreamer.EmitRawText("\t.local .align " + + Twine(FrameInfo->getObjectAlignment(i)) + + " .b8 __local" + + Twine(i) + + "[" + + Twine(FrameInfo->getObjectSize(i)) + + "];"); + } + } + + //unsigned Index = 1; + // Print parameter passing params + //for (PTXMachineFunctionInfo::param_iterator + // i = MFI->paramBegin(), e = MFI->paramEnd(); i != e; ++i) { + // std::string def = "\t.param .b"; + // def += utostr(*i); + // def += " __ret_"; + // def += utostr(Index); + // Index++; + // def += ";"; + // OutStreamer.EmitRawText(Twine(def)); + //} +} + +void PTXAsmPrinter::EmitFunctionBodyEnd() { + OutStreamer.EmitRawText(Twine("}")); +} + +void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { + MCInst TmpInst; + LowerPTXMachineInstrToMCInst(MI, TmpInst, *this); + OutStreamer.EmitInstruction(TmpInst); +} + +void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(gv)) + return; + + MCSymbol *gvsym = Mang->getSymbol(gv); + + assert(gvsym->isUndefined() && "Cannot define a symbol twice!"); + + SmallString<128> decl; + raw_svector_ostream os(decl); + + // check if it is defined in some other translation unit + if (gv->isDeclaration()) + os << ".extern "; + + // state space: e.g., .global + os << '.' << getStateSpaceName(gv->getType()->getAddressSpace()) << ' '; + + // alignment (optional) + unsigned alignment = gv->getAlignment(); + if (alignment != 0) + os << ".align " << gv->getAlignment() << ' '; + + + if (PointerType::classof(gv->getType())) { + PointerType* pointerTy = dyn_cast<PointerType>(gv->getType()); + Type* elementTy = pointerTy->getElementType(); + + if (elementTy->isArrayTy()) { + assert(elementTy->isArrayTy() && "Only pointers to arrays are supported"); + + ArrayType* arrayTy = dyn_cast<ArrayType>(elementTy); + elementTy = arrayTy->getElementType(); + + unsigned numElements = arrayTy->getNumElements(); + + while (elementTy->isArrayTy()) { + arrayTy = dyn_cast<ArrayType>(elementTy); + elementTy = arrayTy->getElementType(); + + numElements *= arrayTy->getNumElements(); + } + + // FIXME: isPrimitiveType() == false for i16? + assert(elementTy->isSingleValueType() && + "Non-primitive types are not handled"); + + // Find the size of the element in bits + unsigned elementSize = elementTy->getPrimitiveSizeInBits(); + + os << ".b" << elementSize << ' ' << gvsym->getName() + << '[' << numElements << ']'; + } else { + os << ".b8" << gvsym->getName() << "[]"; + } + + // handle string constants (assume ConstantArray means string) + if (gv->hasInitializer()) { + const Constant *C = gv->getInitializer(); + if (const ConstantArray *CA = dyn_cast<ConstantArray>(C)) { + os << " = {"; + + for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { + if (i > 0) + os << ','; + + os << "0x"; + os.write_hex(cast<ConstantInt>(CA->getOperand(i))->getZExtValue()); + } + + os << '}'; + } + } + } else { + // Note: this is currently the fall-through case and most likely generates + // incorrect code. + os << getTypeName(gv->getType()) << ' ' << gvsym->getName(); + + if (isa<ArrayType>(gv->getType()) || isa<PointerType>(gv->getType())) + os << "[]"; + } + + os << ';'; + + OutStreamer.EmitRawText(os.str()); + OutStreamer.AddBlankLine(); +} + +void PTXAsmPrinter::EmitFunctionEntryLabel() { + // The function label could have already been emitted if two symbols end up + // conflicting due to asm renaming. Detect this and emit an error. + if (!CurrentFnSym->isUndefined()) + report_fatal_error("'" + Twine(CurrentFnSym->getName()) + + "' label emitted multiple times to assembly file"); + + const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); + const PTXParamManager &PM = MFI->getParamManager(); + const bool isKernel = MFI->isKernel(); + const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>(); + + SmallString<128> decl; + raw_svector_ostream os(decl); + os << (isKernel ? ".entry" : ".func"); + + if (!isKernel) { + os << " ("; + if (ST.useParamSpaceForDeviceArgs()) { + for (PTXParamManager::param_iterator i = PM.ret_begin(), e = PM.ret_end(), + b = i; i != e; ++i) { + if (i != b) + os << ", "; + + os << ".param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i); + } + } else { + for (PTXMachineFunctionInfo::reg_iterator + i = MFI->retreg_begin(), e = MFI->retreg_end(), b = i; + i != e; ++i) { + if (i != b) + os << ", "; + + os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' ' + << MFI->getRegisterName(*i); + } + } + os << ')'; + } + + // Print function name + os << ' ' << CurrentFnSym->getName() << " ("; + + const Function *F = MF->getFunction(); + + // Print parameters + if (isKernel || ST.useParamSpaceForDeviceArgs()) { + /*for (PTXParamManager::param_iterator i = PM.arg_begin(), e = PM.arg_end(), + b = i; i != e; ++i) { + if (i != b) + os << ", "; + + os << ".param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i); + }*/ + int Counter = 1; + for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(), + b = i; i != e; ++i) { + if (i != b) + os << ", "; + const Type *ArgType = (*i).getType(); + os << ".param .b"; + if (ArgType->isPointerTy()) { + if (ST.is64Bit()) + os << "64"; + else + os << "32"; + } else { + os << ArgType->getPrimitiveSizeInBits(); + } + if (ArgType->isPointerTy() && ST.emitPtrAttribute()) { + const PointerType *PtrType = dyn_cast<const PointerType>(ArgType); + os << " .ptr"; + switch (PtrType->getAddressSpace()) { + default: + llvm_unreachable("Unknown address space in argument"); + case PTXStateSpace::Global: + os << " .global"; + break; + case PTXStateSpace::Shared: + os << " .shared"; + break; + } + } + os << " __param_" << Counter++; + } + } else { + for (PTXMachineFunctionInfo::reg_iterator + i = MFI->argreg_begin(), e = MFI->argreg_end(), b = i; + i != e; ++i) { + if (i != b) + os << ", "; + + os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' ' + << MFI->getRegisterName(*i); + } + } + os << ')'; + + OutStreamer.EmitRawText(os.str()); +} + +void PTXAsmPrinter::EmitFunctionDeclaration(const Function* func) +{ + const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>(); + + std::string decl = ""; + + // hard-coded emission of extern vprintf function + + if (func->getName() == "printf" || func->getName() == "puts") { + decl += ".extern .func (.param .b32 __param_1) vprintf (.param .b"; + if (ST.is64Bit()) + decl += "64"; + else + decl += "32"; + decl += " __param_2, .param .b"; + if (ST.is64Bit()) + decl += "64"; + else + decl += "32"; + decl += " __param_3)\n"; + } + + OutStreamer.EmitRawText(Twine(decl)); +} + +unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName, + StringRef DirName) { + // If FE did not provide a file name, then assume stdin. + if (FileName.empty()) + return GetOrCreateSourceID("<stdin>", StringRef()); + + // MCStream expects full path name as filename. + if (!DirName.empty() && !sys::path::is_absolute(FileName)) { + SmallString<128> FullPathName = DirName; + sys::path::append(FullPathName, FileName); + // Here FullPathName will be copied into StringMap by GetOrCreateSourceID. + return GetOrCreateSourceID(StringRef(FullPathName), StringRef()); + } + + StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName); + if (Entry.getValue()) + return Entry.getValue(); + + unsigned SrcId = SourceIdMap.size(); + Entry.setValue(SrcId); + + // Print out a .file directive to specify files for .loc directives. + OutStreamer.EmitDwarfFileDirective(SrcId, "", Entry.getKey()); + + return SrcId; +} + +MCOperand PTXAsmPrinter::GetSymbolRef(const MachineOperand &MO, + const MCSymbol *Symbol) { + const MCExpr *Expr; + Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, OutContext); + return MCOperand::CreateExpr(Expr); +} + +MCOperand PTXAsmPrinter::lowerOperand(const MachineOperand &MO) { + MCOperand MCOp; + const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); + unsigned EncodedReg; + switch (MO.getType()) { + default: + llvm_unreachable("Unknown operand type"); + case MachineOperand::MO_Register: + if (MO.getReg() > 0) { + // Encode the register + EncodedReg = MFI->getEncodedRegister(MO.getReg()); + } else { + EncodedReg = 0; + } + MCOp = MCOperand::CreateReg(EncodedReg); + break; + case MachineOperand::MO_Immediate: + MCOp = MCOperand::CreateImm(MO.getImm()); + break; + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( + MO.getMBB()->getSymbol(), OutContext)); + break; + case MachineOperand::MO_GlobalAddress: + MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal())); + break; + case MachineOperand::MO_ExternalSymbol: + MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName())); + break; + case MachineOperand::MO_FPImmediate: + APFloat Val = MO.getFPImm()->getValueAPF(); + bool ignored; + Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored); + MCOp = MCOperand::CreateFPImm(Val.convertToDouble()); + break; + } + + return MCOp; +} + +// Force static initialization. +extern "C" void LLVMInitializePTXAsmPrinter() { + RegisterAsmPrinter<PTXAsmPrinter> X(ThePTX32Target); + RegisterAsmPrinter<PTXAsmPrinter> Y(ThePTX64Target); +} diff --git a/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h new file mode 100644 index 0000000..74c8d58 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h @@ -0,0 +1,57 @@ +//===-- PTXAsmPrinter.h - Print machine code to a PTX file ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// PTX Assembly printer class. +// +//===----------------------------------------------------------------------===// + +#ifndef PTXASMPRINTER_H +#define PTXASMPRINTER_H + +#include "PTX.h" +#include "PTXTargetMachine.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + +class MCOperand; + +class LLVM_LIBRARY_VISIBILITY PTXAsmPrinter : public AsmPrinter { +public: + explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer) {} + + const char *getPassName() const { return "PTX Assembly Printer"; } + + bool doFinalization(Module &M); + + virtual void EmitStartOfAsmFile(Module &M); + virtual void EmitFunctionBodyStart(); + virtual void EmitFunctionBodyEnd(); + virtual void EmitFunctionEntryLabel(); + virtual void EmitInstruction(const MachineInstr *MI); + + unsigned GetOrCreateSourceID(StringRef FileName, + StringRef DirName); + + MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol); + MCOperand lowerOperand(const MachineOperand &MO); + +private: + void EmitVariableDeclaration(const GlobalVariable *gv); + void EmitFunctionDeclaration(const Function* func); + + StringMap<unsigned> SourceIdMap; +}; // class PTXAsmPrinter +} // namespace llvm + +#endif + diff --git a/contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp b/contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp new file mode 100644 index 0000000..a21d172 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp @@ -0,0 +1,181 @@ +//===-- PTXFPRoundingModePass.cpp - Assign rounding modes pass ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a machine function pass that sets appropriate FP rounding +// modes for all relevant instructions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ptx-fp-rounding-mode" + +#include "PTX.h" +#include "PTXTargetMachine.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +// NOTE: PTXFPRoundingModePass should be executed just before emission. + +namespace { + /// PTXFPRoundingModePass - Pass to assign appropriate FP rounding modes to + /// all FP instructions. Essentially, this pass just looks for all FP + /// instructions that have a rounding mode set to RndDefault, and sets an + /// appropriate rounding mode based on the target device. + /// + class PTXFPRoundingModePass : public MachineFunctionPass { + private: + static char ID; + + typedef std::pair<unsigned, unsigned> RndModeDesc; + + PTXTargetMachine& TargetMachine; + DenseMap<unsigned, RndModeDesc> Instrs; + + public: + PTXFPRoundingModePass(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel) + : MachineFunctionPass(ID), + TargetMachine(TM) { + initializeMap(); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "PTX FP Rounding Mode Pass"; + } + + private: + + void initializeMap(); + void processInstruction(MachineInstr &MI); + }; // class PTXFPRoundingModePass +} // end anonymous namespace + +using namespace llvm; + +char PTXFPRoundingModePass::ID = 0; + +bool PTXFPRoundingModePass::runOnMachineFunction(MachineFunction &MF) { + // Look at each basic block + for (MachineFunction::iterator bbi = MF.begin(), bbe = MF.end(); bbi != bbe; + ++bbi) { + MachineBasicBlock &MBB = *bbi; + // Look at each instruction + for (MachineBasicBlock::iterator ii = MBB.begin(), ie = MBB.end(); + ii != ie; ++ii) { + MachineInstr &MI = *ii; + processInstruction(MI); + } + } + return false; +} + +void PTXFPRoundingModePass::initializeMap() { + using namespace PTXRoundingMode; + const PTXSubtarget& ST = TargetMachine.getSubtarget<PTXSubtarget>(); + + // Build a map of default rounding mode for all instructions that need a + // rounding mode. + Instrs[PTX::FADDrr32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FADDri32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FADDrr64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FADDri64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSUBrr32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSUBri32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSUBrr64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSUBri64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FMULrr32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FMULri32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FMULrr64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FMULri64] = std::make_pair(1U, (unsigned)RndNearestEven); + + Instrs[PTX::FNEGrr32] = std::make_pair(1U, (unsigned)RndNone); + Instrs[PTX::FNEGri32] = std::make_pair(1U, (unsigned)RndNone); + Instrs[PTX::FNEGrr64] = std::make_pair(1U, (unsigned)RndNone); + Instrs[PTX::FNEGri64] = std::make_pair(1U, (unsigned)RndNone); + + unsigned FDivRndMode = ST.fdivNeedsRoundingMode() ? RndNearestEven : RndNone; + Instrs[PTX::FDIVrr32] = std::make_pair(1U, FDivRndMode); + Instrs[PTX::FDIVri32] = std::make_pair(1U, FDivRndMode); + Instrs[PTX::FDIVrr64] = std::make_pair(1U, FDivRndMode); + Instrs[PTX::FDIVri64] = std::make_pair(1U, FDivRndMode); + + unsigned FMADRndMode = ST.fmadNeedsRoundingMode() ? RndNearestEven : RndNone; + Instrs[PTX::FMADrrr32] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrri32] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrii32] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrrr64] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrri64] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrii64] = std::make_pair(1U, FMADRndMode); + + Instrs[PTX::FSQRTrr32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSQRTri32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSQRTrr64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSQRTri64] = std::make_pair(1U, (unsigned)RndNearestEven); + + Instrs[PTX::FSINrr32] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FSINri32] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FSINrr64] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FSINri64] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FCOSrr32] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FCOSri32] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FCOSrr64] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FCOSri64] = std::make_pair(1U, (unsigned)RndApprox); + + Instrs[PTX::CVTu16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + + Instrs[PTX::CVTf32u16] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32s16] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32u32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32s32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32u64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32s64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32f64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64u16] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64s16] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64u32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64s32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64u64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64s64] = std::make_pair(1U, (unsigned)RndNearestEven); +} + +void PTXFPRoundingModePass::processInstruction(MachineInstr &MI) { + // Is this an instruction that needs a rounding mode? + if (Instrs.count(MI.getOpcode())) { + const RndModeDesc &Desc = Instrs[MI.getOpcode()]; + // Get the rounding mode operand + MachineOperand &Op = MI.getOperand(Desc.first); + // Update the rounding mode if needed + if (Op.getImm() == PTXRoundingMode::RndDefault) { + Op.setImm(Desc.second); + } + } +} + +FunctionPass *llvm::createPTXFPRoundingModePass(PTXTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new PTXFPRoundingModePass(TM, OptLevel); +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXFrameLowering.cpp b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.cpp new file mode 100644 index 0000000..e6e268e --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.cpp @@ -0,0 +1,24 @@ +//===-- PTXFrameLowering.cpp - PTX Frame Information ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PTX implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "PTXFrameLowering.h" +#include "llvm/CodeGen/MachineFunction.h" + +using namespace llvm; + +void PTXFrameLowering::emitPrologue(MachineFunction &MF) const { +} + +void PTXFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { +} diff --git a/contrib/llvm/lib/Target/PTX/PTXFrameLowering.h b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.h new file mode 100644 index 0000000..831e818 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.h @@ -0,0 +1,44 @@ +//===-- PTXFrameLowering.h - Define frame lowering for PTX -----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_FRAMEINFO_H +#define PTX_FRAMEINFO_H + +#include "PTX.h" +#include "PTXSubtarget.h" +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { + class PTXSubtarget; + +class PTXFrameLowering : public TargetFrameLowering { +protected: + const PTXSubtarget &STI; + +public: + explicit PTXFrameLowering(const PTXSubtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2), + STI(sti) { + } + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + bool hasFP(const MachineFunction &MF) const { return false; } +}; + +} // End llvm namespace + +#endif diff --git a/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp new file mode 100644 index 0000000..5c7ee29 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp @@ -0,0 +1,356 @@ +//===-- PTXISelDAGToDAG.cpp - A dag to dag inst selector for PTX ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the PTX target. +// +//===----------------------------------------------------------------------===// + +#include "PTX.h" +#include "PTXMachineFunctionInfo.h" +#include "PTXTargetMachine.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { +// PTXDAGToDAGISel - PTX specific code to select PTX machine +// instructions for SelectionDAG operations. +class PTXDAGToDAGISel : public SelectionDAGISel { + public: + PTXDAGToDAGISel(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel); + + virtual const char *getPassName() const { + return "PTX DAG->DAG Pattern Instruction Selection"; + } + + SDNode *Select(SDNode *Node); + + // Complex Pattern Selectors. + bool SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2); + bool SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset); + bool SelectADDRii(SDValue &Addr, SDValue &Base, SDValue &Offset); + bool SelectADDRlocal(SDValue &Addr, SDValue &Base, SDValue &Offset); + + // Include the pieces auto'gened from the target description +#include "PTXGenDAGISel.inc" + + private: + // We need this only because we can't match intruction BRAdp + // pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td + SDNode *SelectBRCOND(SDNode *Node); + + SDNode *SelectREADPARAM(SDNode *Node); + SDNode *SelectWRITEPARAM(SDNode *Node); + SDNode *SelectFrameIndex(SDNode *Node); + + bool isImm(const SDValue &operand); + bool SelectImm(const SDValue &operand, SDValue &imm); + + const PTXSubtarget& getSubtarget() const; +}; // class PTXDAGToDAGISel +} // namespace + +// createPTXISelDag - This pass converts a legalized DAG into a +// PTX-specific DAG, ready for instruction scheduling +FunctionPass *llvm::createPTXISelDag(PTXTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new PTXDAGToDAGISel(TM, OptLevel); +} + +PTXDAGToDAGISel::PTXDAGToDAGISel(PTXTargetMachine &TM, + CodeGenOpt::Level OptLevel) + : SelectionDAGISel(TM, OptLevel) {} + +SDNode *PTXDAGToDAGISel::Select(SDNode *Node) { + switch (Node->getOpcode()) { + case ISD::BRCOND: + return SelectBRCOND(Node); + case PTXISD::READ_PARAM: + return SelectREADPARAM(Node); + case PTXISD::WRITE_PARAM: + return SelectWRITEPARAM(Node); + case ISD::FrameIndex: + return SelectFrameIndex(Node); + default: + return SelectCode(Node); + } +} + +SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) { + assert(Node->getNumOperands() >= 3); + + SDValue Chain = Node->getOperand(0); + SDValue Pred = Node->getOperand(1); + SDValue Target = Node->getOperand(2); // branch target + SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::Normal, MVT::i32); + DebugLoc dl = Node->getDebugLoc(); + + assert(Target.getOpcode() == ISD::BasicBlock); + assert(Pred.getValueType() == MVT::i1); + + // Emit BRAdp + SDValue Ops[] = { Target, Pred, PredOp, Chain }; + return CurDAG->getMachineNode(PTX::BRAdp, dl, MVT::Other, Ops, 4); +} + +SDNode *PTXDAGToDAGISel::SelectREADPARAM(SDNode *Node) { + SDValue Chain = Node->getOperand(0); + SDValue Index = Node->getOperand(1); + + int OpCode; + + // Get the type of parameter we are reading + EVT VT = Node->getValueType(0); + assert(VT.isSimple() && "READ_PARAM only implemented for MVT types"); + + MVT Type = VT.getSimpleVT(); + + if (Type == MVT::i1) + OpCode = PTX::READPARAMPRED; + else if (Type == MVT::i16) + OpCode = PTX::READPARAMI16; + else if (Type == MVT::i32) + OpCode = PTX::READPARAMI32; + else if (Type == MVT::i64) + OpCode = PTX::READPARAMI64; + else if (Type == MVT::f32) + OpCode = PTX::READPARAMF32; + else { + assert(Type == MVT::f64 && "Unexpected type!"); + OpCode = PTX::READPARAMF64; + } + + SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1); + SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32); + DebugLoc dl = Node->getDebugLoc(); + + SDValue Ops[] = { Index, Pred, PredOp, Chain }; + return CurDAG->getMachineNode(OpCode, dl, VT, Ops, 4); +} + +SDNode *PTXDAGToDAGISel::SelectWRITEPARAM(SDNode *Node) { + + SDValue Chain = Node->getOperand(0); + SDValue Value = Node->getOperand(1); + + int OpCode; + + //Node->dumpr(CurDAG); + + // Get the type of parameter we are writing + EVT VT = Value->getValueType(0); + assert(VT.isSimple() && "WRITE_PARAM only implemented for MVT types"); + + MVT Type = VT.getSimpleVT(); + + if (Type == MVT::i1) + OpCode = PTX::WRITEPARAMPRED; + else if (Type == MVT::i16) + OpCode = PTX::WRITEPARAMI16; + else if (Type == MVT::i32) + OpCode = PTX::WRITEPARAMI32; + else if (Type == MVT::i64) + OpCode = PTX::WRITEPARAMI64; + else if (Type == MVT::f32) + OpCode = PTX::WRITEPARAMF32; + else if (Type == MVT::f64) + OpCode = PTX::WRITEPARAMF64; + else + llvm_unreachable("Invalid type in SelectWRITEPARAM"); + + SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1); + SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32); + DebugLoc dl = Node->getDebugLoc(); + + SDValue Ops[] = { Value, Pred, PredOp, Chain }; + SDNode* Ret = CurDAG->getMachineNode(OpCode, dl, MVT::Other, Ops, 4); + + //dbgs() << "SelectWRITEPARAM produced:\n\t"; + //Ret->dumpr(CurDAG); + + return Ret; +} + +SDNode *PTXDAGToDAGISel::SelectFrameIndex(SDNode *Node) { + int FI = cast<FrameIndexSDNode>(Node)->getIndex(); + //dbgs() << "Selecting FrameIndex at index " << FI << "\n"; + //SDValue TFI = CurDAG->getTargetFrameIndex(FI, Node->getValueType(0)); + + PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); + + SDValue FrameSymbol = CurDAG->getTargetExternalSymbol(MFI->getFrameSymbol(FI), + Node->getValueType(0)); + + return FrameSymbol.getNode(); +} + +// Match memory operand of the form [reg+reg] +bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) { + if (Addr.getOpcode() != ISD::ADD || Addr.getNumOperands() < 2 || + isImm(Addr.getOperand(0)) || isImm(Addr.getOperand(1))) + return false; + + assert(Addr.getValueType().isSimple() && "Type must be simple"); + + R1 = Addr; + R2 = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + + return true; +} + +// Match memory operand of the form [reg], [imm+reg], and [reg+imm] +bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base, + SDValue &Offset) { + // FrameIndex addresses are handled separately + //errs() << "SelectADDRri: "; + //Addr.getNode()->dumpr(); + if (isa<FrameIndexSDNode>(Addr)) { + //errs() << "Failure\n"; + return false; + } + + if (CurDAG->isBaseWithConstantOffset(Addr)) { + Base = Addr.getOperand(0); + if (isa<FrameIndexSDNode>(Base)) { + //errs() << "Failure\n"; + return false; + } + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); + //errs() << "Success\n"; + return true; + } + + /*if (Addr.getNumOperands() == 1) { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + errs() << "Success\n"; + return true; + }*/ + + //errs() << "SelectADDRri fails on: "; + //Addr.getNode()->dumpr(); + + if (isImm(Addr)) { + //errs() << "Failure\n"; + return false; + } + + Base = Addr; + Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + + //errs() << "Success\n"; + return true; + + /*if (Addr.getOpcode() != ISD::ADD) { + // let SelectADDRii handle the [imm] case + if (isImm(Addr)) + return false; + // it is [reg] + + assert(Addr.getValueType().isSimple() && "Type must be simple"); + Base = Addr; + Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + + return true; + } + + if (Addr.getNumOperands() < 2) + return false; + + // let SelectADDRii handle the [imm+imm] case + if (isImm(Addr.getOperand(0)) && isImm(Addr.getOperand(1))) + return false; + + // try [reg+imm] and [imm+reg] + for (int i = 0; i < 2; i ++) + if (SelectImm(Addr.getOperand(1-i), Offset)) { + Base = Addr.getOperand(i); + return true; + } + + // neither [reg+imm] nor [imm+reg] + return false;*/ +} + +// Match memory operand of the form [imm+imm] and [imm] +bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base, + SDValue &Offset) { + // is [imm+imm]? + if (Addr.getOpcode() == ISD::ADD) { + return SelectImm(Addr.getOperand(0), Base) && + SelectImm(Addr.getOperand(1), Offset); + } + + // is [imm]? + if (SelectImm(Addr, Base)) { + assert(Addr.getValueType().isSimple() && "Type must be simple"); + + Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + + return true; + } + + return false; +} + +// Match memory operand of the form [reg], [imm+reg], and [reg+imm] +bool PTXDAGToDAGISel::SelectADDRlocal(SDValue &Addr, SDValue &Base, + SDValue &Offset) { + //errs() << "SelectADDRlocal: "; + //Addr.getNode()->dumpr(); + if (isa<FrameIndexSDNode>(Addr)) { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + //errs() << "Success\n"; + return true; + } + + if (CurDAG->isBaseWithConstantOffset(Addr)) { + Base = Addr.getOperand(0); + if (!isa<FrameIndexSDNode>(Base)) { + //errs() << "Failure\n"; + return false; + } + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); + //errs() << "Offset: "; + //Offset.getNode()->dumpr(); + //errs() << "Success\n"; + return true; + } + + //errs() << "Failure\n"; + return false; +} + +bool PTXDAGToDAGISel::isImm(const SDValue &operand) { + return ConstantSDNode::classof(operand.getNode()); +} + +bool PTXDAGToDAGISel::SelectImm(const SDValue &operand, SDValue &imm) { + SDNode *node = operand.getNode(); + if (!ConstantSDNode::classof(node)) + return false; + + ConstantSDNode *CN = cast<ConstantSDNode>(node); + imm = CurDAG->getTargetConstant(*CN->getConstantIntValue(), + operand.getValueType()); + return true; +} + +const PTXSubtarget& PTXDAGToDAGISel::getSubtarget() const +{ + return TM.getSubtarget<PTXSubtarget>(); +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp b/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp new file mode 100644 index 0000000..ef4455b --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp @@ -0,0 +1,522 @@ +//===-- PTXISelLowering.cpp - PTX DAG Lowering Implementation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PTXTargetLowering class. +// +//===----------------------------------------------------------------------===// + +#include "PTXISelLowering.h" +#include "PTX.h" +#include "PTXMachineFunctionInfo.h" +#include "PTXRegisterInfo.h" +#include "PTXSubtarget.h" +#include "llvm/Function.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// TargetLowering Implementation +//===----------------------------------------------------------------------===// + +PTXTargetLowering::PTXTargetLowering(TargetMachine &TM) + : TargetLowering(TM, new TargetLoweringObjectFileELF()) { + // Set up the register classes. + addRegisterClass(MVT::i1, PTX::RegPredRegisterClass); + addRegisterClass(MVT::i16, PTX::RegI16RegisterClass); + addRegisterClass(MVT::i32, PTX::RegI32RegisterClass); + addRegisterClass(MVT::i64, PTX::RegI64RegisterClass); + addRegisterClass(MVT::f32, PTX::RegF32RegisterClass); + addRegisterClass(MVT::f64, PTX::RegF64RegisterClass); + + setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? + setMinFunctionAlignment(2); + + // Let LLVM use loads/stores for all mem* operations + maxStoresPerMemcpy = 4096; + maxStoresPerMemmove = 4096; + maxStoresPerMemset = 4096; + + //////////////////////////////////// + /////////// Expansion ////////////// + //////////////////////////////////// + + // (any/zero/sign) extload => load + (any/zero/sign) extend + + setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand); + + // f32 extload => load + fextend + + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + + // f64 truncstore => trunc + store + + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + + // sign_extend_inreg => sign_extend + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + // br_cc => brcond + + setOperationAction(ISD::BR_CC, MVT::Other, Expand); + + // select_cc => setcc + + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + + //////////////////////////////////// + //////////// Legal ///////////////// + //////////////////////////////////// + + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + + //////////////////////////////////// + //////////// Custom //////////////// + //////////////////////////////////// + + // customise setcc to use bitwise logic if possible + + //setOperationAction(ISD::SETCC, MVT::i1, Custom); + setOperationAction(ISD::SETCC, MVT::i1, Legal); + + // customize translation of memory addresses + + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + + // Compute derived properties from the register classes + computeRegisterProperties(); +} + +EVT PTXTargetLowering::getSetCCResultType(EVT VT) const { + return MVT::i1; +} + +SDValue PTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unimplemented operand"); + case ISD::SETCC: + return LowerSETCC(Op, DAG); + case ISD::GlobalAddress: + return LowerGlobalAddress(Op, DAG); + } +} + +const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: + llvm_unreachable("Unknown opcode"); + case PTXISD::COPY_ADDRESS: + return "PTXISD::COPY_ADDRESS"; + case PTXISD::LOAD_PARAM: + return "PTXISD::LOAD_PARAM"; + case PTXISD::STORE_PARAM: + return "PTXISD::STORE_PARAM"; + case PTXISD::READ_PARAM: + return "PTXISD::READ_PARAM"; + case PTXISD::WRITE_PARAM: + return "PTXISD::WRITE_PARAM"; + case PTXISD::EXIT: + return "PTXISD::EXIT"; + case PTXISD::RET: + return "PTXISD::RET"; + case PTXISD::CALL: + return "PTXISD::CALL"; + } +} + +//===----------------------------------------------------------------------===// +// Custom Lower Operation +//===----------------------------------------------------------------------===// + +SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType() == MVT::i1 && "SetCC type must be 1-bit integer"); + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); + //ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + + // Look for X == 0, X == 1, X != 0, or X != 1 + // We can simplify these to bitwise logic + + //if (Op1.getOpcode() == ISD::Constant && + // (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 || + // cast<ConstantSDNode>(Op1)->isNullValue()) && + // (CC == ISD::SETEQ || CC == ISD::SETNE)) { + // + // return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1); + //} + + //ConstantSDNode* COp1 = cast<ConstantSDNode>(Op1); + //if(COp1 && COp1->getZExtValue() == 1) { + // if(CC == ISD::SETNE) { + // return DAG.getNode(PTX::XORripreds, dl, MVT::i1, Op0); + // } + //} + + llvm_unreachable("setcc was not matched by a pattern!"); + + return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2); +} + +SDValue PTXTargetLowering:: +LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { + EVT PtrVT = getPointerTy(); + DebugLoc dl = Op.getDebugLoc(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + + assert(PtrVT.isSimple() && "Pointer must be to primitive type."); + + SDValue targetGlobal = DAG.getTargetGlobalAddress(GV, dl, PtrVT); + SDValue movInstr = DAG.getNode(PTXISD::COPY_ADDRESS, + dl, + PtrVT.getSimpleVT(), + targetGlobal); + + return movInstr; +} + +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +//===----------------------------------------------------------------------===// + +SDValue PTXTargetLowering:: + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + if (isVarArg) llvm_unreachable("PTX does not support varargs"); + + MachineFunction &MF = DAG.getMachineFunction(); + const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>(); + PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); + PTXParamManager &PM = MFI->getParamManager(); + + switch (CallConv) { + default: + llvm_unreachable("Unsupported calling convention"); + case CallingConv::PTX_Kernel: + MFI->setKernel(true); + break; + case CallingConv::PTX_Device: + MFI->setKernel(false); + break; + } + + // We do one of two things here: + // IsKernel || SM >= 2.0 -> Use param space for arguments + // SM < 2.0 -> Use registers for arguments + if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) { + // We just need to emit the proper LOAD_PARAM ISDs + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + assert((!MFI->isKernel() || Ins[i].VT != MVT::i1) && + "Kernels cannot take pred operands"); + + unsigned ParamSize = Ins[i].VT.getStoreSizeInBits(); + unsigned Param = PM.addArgumentParam(ParamSize); + const std::string &ParamName = PM.getParamName(Param); + SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), + MVT::Other); + SDValue ArgValue = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain, + ParamValue); + InVals.push_back(ArgValue); + } + } + else { + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + EVT RegVT = Ins[i].VT; + const TargetRegisterClass* TRC = getRegClassFor(RegVT); + unsigned RegType; + + // Determine which register class we need + if (RegVT == MVT::i1) + RegType = PTXRegisterType::Pred; + else if (RegVT == MVT::i16) + RegType = PTXRegisterType::B16; + else if (RegVT == MVT::i32) + RegType = PTXRegisterType::B32; + else if (RegVT == MVT::i64) + RegType = PTXRegisterType::B64; + else if (RegVT == MVT::f32) + RegType = PTXRegisterType::F32; + else if (RegVT == MVT::f64) + RegType = PTXRegisterType::F64; + else + llvm_unreachable("Unknown parameter type"); + + // Use a unique index in the instruction to prevent instruction folding. + // Yes, this is a hack. + SDValue Index = DAG.getTargetConstant(i, MVT::i32); + unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC); + SDValue ArgValue = DAG.getNode(PTXISD::READ_PARAM, dl, RegVT, Chain, + Index); + + InVals.push_back(ArgValue); + + MFI->addRegister(Reg, RegType, PTXRegisterSpace::Argument); + } + } + + return Chain; +} + +SDValue PTXTargetLowering:: + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc dl, + SelectionDAG &DAG) const { + if (isVarArg) llvm_unreachable("PTX does not support varargs"); + + switch (CallConv) { + default: + llvm_unreachable("Unsupported calling convention."); + case CallingConv::PTX_Kernel: + assert(Outs.size() == 0 && "Kernel must return void."); + return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain); + case CallingConv::PTX_Device: + assert(Outs.size() <= 1 && "Can at most return one value."); + break; + } + + MachineFunction& MF = DAG.getMachineFunction(); + PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); + PTXParamManager &PM = MFI->getParamManager(); + + SDValue Flag; + const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>(); + + if (ST.useParamSpaceForDeviceArgs()) { + assert(Outs.size() < 2 && "Device functions can return at most one value"); + + if (Outs.size() == 1) { + unsigned ParamSize = OutVals[0].getValueType().getSizeInBits(); + unsigned Param = PM.addReturnParam(ParamSize); + const std::string &ParamName = PM.getParamName(Param); + SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), + MVT::Other); + Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, + ParamValue, OutVals[0]); + } + } else { + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + EVT RegVT = Outs[i].VT; + const TargetRegisterClass* TRC; + unsigned RegType; + + // Determine which register class we need + if (RegVT == MVT::i1) { + TRC = PTX::RegPredRegisterClass; + RegType = PTXRegisterType::Pred; + } + else if (RegVT == MVT::i16) { + TRC = PTX::RegI16RegisterClass; + RegType = PTXRegisterType::B16; + } + else if (RegVT == MVT::i32) { + TRC = PTX::RegI32RegisterClass; + RegType = PTXRegisterType::B32; + } + else if (RegVT == MVT::i64) { + TRC = PTX::RegI64RegisterClass; + RegType = PTXRegisterType::B64; + } + else if (RegVT == MVT::f32) { + TRC = PTX::RegF32RegisterClass; + RegType = PTXRegisterType::F32; + } + else if (RegVT == MVT::f64) { + TRC = PTX::RegF64RegisterClass; + RegType = PTXRegisterType::F64; + } + else { + llvm_unreachable("Unknown parameter type"); + } + + unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC); + + SDValue Copy = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i]/*, Flag*/); + SDValue OutReg = DAG.getRegister(Reg, RegVT); + + Chain = DAG.getNode(PTXISD::WRITE_PARAM, dl, MVT::Other, Copy, OutReg); + + MFI->addRegister(Reg, RegType, PTXRegisterSpace::Return); + } + } + + if (Flag.getNode() == 0) { + return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain); + } + else { + return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag); + } +} + +SDValue +PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool doesNotRet, bool &isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + + MachineFunction& MF = DAG.getMachineFunction(); + PTXMachineFunctionInfo *PTXMFI = MF.getInfo<PTXMachineFunctionInfo>(); + PTXParamManager &PM = PTXMFI->getParamManager(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + assert(getTargetMachine().getSubtarget<PTXSubtarget>().callsAreHandled() && + "Calls are not handled for the target device"); + + // Identify the callee function + const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); + const Function *function = cast<Function>(GV); + + // allow non-device calls only for printf + bool isPrintf = function->getName() == "printf" || function->getName() == "puts"; + + assert((isPrintf || function->getCallingConv() == CallingConv::PTX_Device) && + "PTX function calls must be to PTX device functions"); + + unsigned outSize = isPrintf ? 2 : Outs.size(); + + std::vector<SDValue> Ops; + // The layout of the ops will be [Chain, #Ins, Ins, Callee, #Outs, Outs] + Ops.resize(outSize + Ins.size() + 4); + + Ops[0] = Chain; + + // Identify the callee function + Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); + Ops[Ins.size()+2] = Callee; + + // #Outs + Ops[Ins.size()+3] = DAG.getTargetConstant(outSize, MVT::i32); + + if (isPrintf) { + // first argument is the address of the global string variable in memory + unsigned Param0 = PM.addLocalParam(getPointerTy().getSizeInBits()); + SDValue ParamValue0 = DAG.getTargetExternalSymbol(PM.getParamName(Param0).c_str(), + MVT::Other); + Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, + ParamValue0, OutVals[0]); + Ops[Ins.size()+4] = ParamValue0; + + // alignment is the maximum size of all the arguments + unsigned alignment = 0; + for (unsigned i = 1; i < OutVals.size(); ++i) { + alignment = std::max(alignment, + OutVals[i].getValueType().getSizeInBits()); + } + + // size is the alignment multiplied by the number of arguments + unsigned size = alignment * (OutVals.size() - 1); + + // second argument is the address of the stack object (unless no arguments) + unsigned Param1 = PM.addLocalParam(getPointerTy().getSizeInBits()); + SDValue ParamValue1 = DAG.getTargetExternalSymbol(PM.getParamName(Param1).c_str(), + MVT::Other); + Ops[Ins.size()+5] = ParamValue1; + + if (size > 0) + { + // create a local stack object to store the arguments + unsigned StackObject = MFI->CreateStackObject(size / 8, alignment / 8, false); + SDValue FrameIndex = DAG.getFrameIndex(StackObject, getPointerTy()); + + // store each of the arguments to the stack in turn + for (unsigned int i = 1; i != OutVals.size(); i++) { + SDValue FrameAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameIndex, DAG.getTargetConstant((i - 1) * 8, getPointerTy())); + Chain = DAG.getStore(Chain, dl, OutVals[i], FrameAddr, + MachinePointerInfo(), + false, false, 0); + } + + // copy the address of the local frame index to get the address in non-local space + SDValue genericAddr = DAG.getNode(PTXISD::COPY_ADDRESS, dl, getPointerTy(), FrameIndex); + + // store this address in the second argument + Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, ParamValue1, genericAddr); + } + } + else + { + // Generate STORE_PARAM nodes for each function argument. In PTX, function + // arguments are explicitly stored into .param variables and passed as + // arguments. There is no register/stack-based calling convention in PTX. + for (unsigned i = 0; i != OutVals.size(); ++i) { + unsigned Size = OutVals[i].getValueType().getSizeInBits(); + unsigned Param = PM.addLocalParam(Size); + const std::string &ParamName = PM.getParamName(Param); + SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), + MVT::Other); + Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, + ParamValue, OutVals[i]); + Ops[i+Ins.size()+4] = ParamValue; + } + } + + std::vector<SDValue> InParams; + + // Generate list of .param variables to hold the return value(s). + Ops[1] = DAG.getTargetConstant(Ins.size(), MVT::i32); + for (unsigned i = 0; i < Ins.size(); ++i) { + unsigned Size = Ins[i].VT.getStoreSizeInBits(); + unsigned Param = PM.addLocalParam(Size); + const std::string &ParamName = PM.getParamName(Param); + SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), + MVT::Other); + Ops[i+2] = ParamValue; + InParams.push_back(ParamValue); + } + + Ops[0] = Chain; + + // Create the CALL node. + Chain = DAG.getNode(PTXISD::CALL, dl, MVT::Other, &Ops[0], Ops.size()); + + // Create the LOAD_PARAM nodes that retrieve the function return value(s). + for (unsigned i = 0; i < Ins.size(); ++i) { + SDValue Load = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain, + InParams[i]); + InVals.push_back(Load); + } + + return Chain; +} + +unsigned PTXTargetLowering::getNumRegisters(LLVMContext &Context, EVT VT) { + // All arguments consist of one "register," regardless of the type. + return 1; +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXISelLowering.h b/contrib/llvm/lib/Target/PTX/PTXISelLowering.h new file mode 100644 index 0000000..33220f4 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXISelLowering.h @@ -0,0 +1,82 @@ +//===-- PTXISelLowering.h - PTX DAG Lowering Interface ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that PTX uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_ISEL_LOWERING_H +#define PTX_ISEL_LOWERING_H + +#include "llvm/Target/TargetLowering.h" + +namespace llvm { + +namespace PTXISD { + enum NodeType { + FIRST_NUMBER = ISD::BUILTIN_OP_END, + LOAD_PARAM, + STORE_PARAM, + READ_PARAM, + WRITE_PARAM, + EXIT, + RET, + COPY_ADDRESS, + CALL + }; +} // namespace PTXISD + +class PTXTargetLowering : public TargetLowering { + public: + explicit PTXTargetLowering(TargetMachine &TM); + + virtual const char *getTargetNodeName(unsigned Opcode) const; + + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + + virtual SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + + virtual SDValue + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + virtual SDValue + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc dl, + SelectionDAG &DAG) const; + + virtual SDValue + LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, + bool isVarArg, bool doesNotRet, bool &isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + virtual EVT getSetCCResultType(EVT VT) const; + + virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT); + + private: + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; +}; // class PTXTargetLowering +} // namespace llvm + +#endif // PTX_ISEL_LOWERING_H diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td b/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td new file mode 100644 index 0000000..267e834 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td @@ -0,0 +1,51 @@ +//===-- PTXInstrFormats.td - PTX Instruction Formats -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +// Rounding Mode Specifier +/*class RoundingMode<bits<3> val> { + bits<3> Value = val; +} + +def RndDefault : RoundingMode<0>; +def RndNearestEven : RoundingMode<1>; +def RndNearestZero : RoundingMode<2>; +def RndNegInf : RoundingMode<3>; +def RndPosInf : RoundingMode<4>; +def RndApprox : RoundingMode<5>;*/ + + +// Rounding Mode Operand +def RndMode : Operand<i32> { + let PrintMethod = "printRoundingMode"; +} + +def RndDefault : PatLeaf<(i32 0)>; + +// PTX Predicate operand, default to (0, 0) = (zero-reg, none). +// Leave PrintMethod empty; predicate printing is defined elsewhere. +def pred : PredicateOperand<OtherVT, (ops RegPred, i32imm), + (ops (i1 zero_reg), (i32 2))>; + +def RndModeOperand : Operand<OtherVT> { + let MIOperandInfo = (ops i32imm); +} + +// Instruction Types +let Namespace = "PTX" in { + + class InstPTX<dag oops, dag iops, string asmstr, list<dag> pattern> + : Instruction { + dag OutOperandList = oops; + dag InOperandList = !con(iops, (ins pred:$_p)); + let AsmString = asmstr; // Predicate printing is defined elsewhere. + let Pattern = pattern; + let isPredicable = 1; + } +} diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp new file mode 100644 index 0000000..443cd54 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp @@ -0,0 +1,359 @@ +//===-- PTXInstrInfo.cpp - PTX Instruction Information --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PTX implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ptx-instrinfo" + +#include "PTXInstrInfo.h" +#include "PTX.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" + +#define GET_INSTRINFO_CTOR +#include "PTXGenInstrInfo.inc" + +using namespace llvm; + +PTXInstrInfo::PTXInstrInfo(PTXTargetMachine &_TM) + : PTXGenInstrInfo(), + RI(_TM, *this), TM(_TM) {} + +static const struct map_entry { + const TargetRegisterClass *cls; + const int opcode; +} map[] = { + { &PTX::RegI16RegClass, PTX::MOVU16rr }, + { &PTX::RegI32RegClass, PTX::MOVU32rr }, + { &PTX::RegI64RegClass, PTX::MOVU64rr }, + { &PTX::RegF32RegClass, PTX::MOVF32rr }, + { &PTX::RegF64RegClass, PTX::MOVF64rr }, + { &PTX::RegPredRegClass, PTX::MOVPREDrr } +}; + +void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DstReg, unsigned SrcReg, + bool KillSrc) const { + + const MachineRegisterInfo& MRI = MBB.getParent()->getRegInfo(); + //assert(MRI.getRegClass(SrcReg) == MRI.getRegClass(DstReg) && + // "Invalid register copy between two register classes"); + + for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++i) { + if (map[i].cls == MRI.getRegClass(DstReg)) { + const MCInstrDesc &MCID = get(map[i].opcode); + MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg). + addReg(SrcReg, getKillRegState(KillSrc)); + AddDefaultPredicate(MI); + return; + } + } + + llvm_unreachable("Impossible reg-to-reg copy"); +} + +bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DstReg, unsigned SrcReg, + const TargetRegisterClass *DstRC, + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { + if (DstRC != SrcRC) + return false; + + for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) + if (DstRC == map[i].cls) { + const MCInstrDesc &MCID = get(map[i].opcode); + MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).addReg(SrcReg); + AddDefaultPredicate(MI); + return true; + } + + return false; +} + +bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const { + switch (MI.getOpcode()) { + default: + return false; + case PTX::MOVU16rr: + case PTX::MOVU32rr: + case PTX::MOVU64rr: + case PTX::MOVF32rr: + case PTX::MOVF64rr: + case PTX::MOVPREDrr: + assert(MI.getNumOperands() >= 2 && + MI.getOperand(0).isReg() && MI.getOperand(1).isReg() && + "Invalid register-register move instruction"); + SrcSubIdx = DstSubIdx = 0; // No sub-registers + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + return true; + } +} + +// predicate support + +bool PTXInstrInfo::isPredicated(const MachineInstr *MI) const { + int i = MI->findFirstPredOperandIdx(); + return i != -1 && MI->getOperand(i).getReg() != PTX::NoRegister; +} + +bool PTXInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { + return !isPredicated(MI) && MI->isTerminator(); +} + +bool PTXInstrInfo:: +PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Pred) const { + if (Pred.size() < 2) + llvm_unreachable("lesser than 2 predicate operands are provided"); + + int i = MI->findFirstPredOperandIdx(); + if (i == -1) + llvm_unreachable("missing predicate operand"); + + MI->getOperand(i).setReg(Pred[0].getReg()); + MI->getOperand(i+1).setImm(Pred[1].getImm()); + + return true; +} + +bool PTXInstrInfo:: +SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, + const SmallVectorImpl<MachineOperand> &Pred2) const { + const MachineOperand &PredReg1 = Pred1[0]; + const MachineOperand &PredReg2 = Pred2[0]; + if (PredReg1.getReg() != PredReg2.getReg()) + return false; + + const MachineOperand &PredOp1 = Pred1[1]; + const MachineOperand &PredOp2 = Pred2[1]; + if (PredOp1.getImm() != PredOp2.getImm()) + return false; + + return true; +} + +bool PTXInstrInfo:: +DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const { + // If an instruction sets a predicate register, it defines a predicate. + + // TODO supprot 5-operand format of setp instruction + + if (MI->getNumOperands() < 1) + return false; + + const MachineOperand &MO = MI->getOperand(0); + + if (!MO.isReg() || RI.getRegClass(MO.getReg()) != &PTX::RegPredRegClass) + return false; + + Pred.push_back(MO); + Pred.push_back(MachineOperand::CreateImm(PTXPredicate::None)); + return true; +} + +// branch support + +bool PTXInstrInfo:: +AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { + // TODO implement cases when AllowModify is true + + if (MBB.empty()) + return true; + + MachineBasicBlock::iterator iter = MBB.end(); + const MachineInstr& instLast1 = *--iter; + // for special case that MBB has only 1 instruction + const bool IsSizeOne = MBB.size() == 1; + // if IsSizeOne is true, *--iter and instLast2 are invalid + // we put a dummy value in instLast2 and desc2 since they are used + const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter; + + DEBUG(dbgs() << "\n"); + DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n"); + DEBUG(dbgs() << "AnalyzeBranch: MBB: " << MBB.getName().str() << "\n"); + DEBUG(dbgs() << "AnalyzeBranch: TBB: " << TBB << "\n"); + DEBUG(dbgs() << "AnalyzeBranch: FBB: " << FBB << "\n"); + + // this block ends with no branches + if (!IsAnyKindOfBranch(instLast1)) { + DEBUG(dbgs() << "AnalyzeBranch: ends with no branch\n"); + return false; + } + + // this block ends with only an unconditional branch + if (instLast1.isUnconditionalBranch() && + // when IsSizeOne is true, it "absorbs" the evaluation of instLast2 + (IsSizeOne || !IsAnyKindOfBranch(instLast2))) { + DEBUG(dbgs() << "AnalyzeBranch: ends with only uncond branch\n"); + TBB = GetBranchTarget(instLast1); + return false; + } + + // this block ends with a conditional branch and + // it falls through to a successor block + if (instLast1.isConditionalBranch() && + IsAnySuccessorAlsoLayoutSuccessor(MBB)) { + DEBUG(dbgs() << "AnalyzeBranch: ends with cond branch and fall through\n"); + TBB = GetBranchTarget(instLast1); + int i = instLast1.findFirstPredOperandIdx(); + Cond.push_back(instLast1.getOperand(i)); + Cond.push_back(instLast1.getOperand(i+1)); + return false; + } + + // when IsSizeOne is true, we are done + if (IsSizeOne) + return true; + + // this block ends with a conditional branch + // followed by an unconditional branch + if (instLast2.isConditionalBranch() && + instLast1.isUnconditionalBranch()) { + DEBUG(dbgs() << "AnalyzeBranch: ends with cond and uncond branch\n"); + TBB = GetBranchTarget(instLast2); + FBB = GetBranchTarget(instLast1); + int i = instLast2.findFirstPredOperandIdx(); + Cond.push_back(instLast2.getOperand(i)); + Cond.push_back(instLast2.getOperand(i+1)); + return false; + } + + // branch cannot be understood + DEBUG(dbgs() << "AnalyzeBranch: cannot be understood\n"); + return true; +} + +unsigned PTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + unsigned count = 0; + while (!MBB.empty()) + if (IsAnyKindOfBranch(MBB.back())) { + MBB.pop_back(); + ++count; + } else + break; + DEBUG(dbgs() << "RemoveBranch: MBB: " << MBB.getName().str() << "\n"); + DEBUG(dbgs() << "RemoveBranch: remove " << count << " branch inst\n"); + return count; +} + +unsigned PTXInstrInfo:: +InsertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { + DEBUG(dbgs() << "InsertBranch: MBB: " << MBB.getName().str() << "\n"); + DEBUG(if (TBB) dbgs() << "InsertBranch: TBB: " << TBB->getName().str() + << "\n"; + else dbgs() << "InsertBranch: TBB: (NULL)\n"); + DEBUG(if (FBB) dbgs() << "InsertBranch: FBB: " << FBB->getName().str() + << "\n"; + else dbgs() << "InsertBranch: FBB: (NULL)\n"); + DEBUG(dbgs() << "InsertBranch: Cond size: " << Cond.size() << "\n"); + + assert(TBB && "TBB is NULL"); + + if (FBB) { + BuildMI(&MBB, DL, get(PTX::BRAdp)) + .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm()); + BuildMI(&MBB, DL, get(PTX::BRAd)) + .addMBB(FBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None); + return 2; + } else if (Cond.size()) { + BuildMI(&MBB, DL, get(PTX::BRAdp)) + .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm()); + return 1; + } else { + BuildMI(&MBB, DL, get(PTX::BRAd)) + .addMBB(TBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None); + return 1; + } +} + +// Memory operand folding for spills +void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MII, + unsigned SrcReg, bool isKill, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + llvm_unreachable("storeRegToStackSlot should not be called for PTX"); +} + +void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MII, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + llvm_unreachable("loadRegFromStackSlot should not be called for PTX"); +} + +// static helper routines + +MachineSDNode *PTXInstrInfo:: +GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, + DebugLoc dl, EVT VT, SDValue Op1) { + SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1); + SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32); + SDValue ops[] = { Op1, predReg, predOp }; + return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); +} + +MachineSDNode *PTXInstrInfo:: +GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, + DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2) { + SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1); + SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32); + SDValue ops[] = { Op1, Op2, predReg, predOp }; + return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); +} + +void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) { + if (MI->findFirstPredOperandIdx() == -1) { + MI->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false)); + MI->addOperand(MachineOperand::CreateImm(PTXPredicate::None)); + } +} + +bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) { + return inst.isTerminator() || inst.isBranch() || inst.isIndirectBranch(); +} + +bool PTXInstrInfo:: +IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB) { + for (MachineBasicBlock::const_succ_iterator + i = MBB.succ_begin(), e = MBB.succ_end(); i != e; ++i) + if (MBB.isLayoutSuccessor((const MachineBasicBlock*) &*i)) + return true; + return false; +} + +MachineBasicBlock *PTXInstrInfo::GetBranchTarget(const MachineInstr& inst) { + // FIXME So far all branch instructions put destination in 1st operand + const MachineOperand& target = inst.getOperand(0); + assert(target.isMBB() && "FIXME: detect branch target operand"); + return target.getMBB(); +} diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.h b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.h new file mode 100644 index 0000000..fba89c0 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.h @@ -0,0 +1,133 @@ +//===-- PTXInstrInfo.h - PTX Instruction Information ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PTX implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_INSTR_INFO_H +#define PTX_INSTR_INFO_H + +#include "PTXRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" + +#define GET_INSTRINFO_HEADER +#include "PTXGenInstrInfo.inc" + +namespace llvm { +class PTXTargetMachine; + +class MachineSDNode; +class SDValue; +class SelectionDAG; + +class PTXInstrInfo : public PTXGenInstrInfo { +private: + const PTXRegisterInfo RI; + PTXTargetMachine &TM; + +public: + explicit PTXInstrInfo(PTXTargetMachine &_TM); + + virtual const PTXRegisterInfo &getRegisterInfo() const { return RI; } + + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DstReg, unsigned SrcReg, + bool KillSrc) const; + + virtual bool copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DstReg, unsigned SrcReg, + const TargetRegisterClass *DstRC, + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; + + virtual bool isMoveInstr(const MachineInstr& MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + + // predicate support + + virtual bool isPredicated(const MachineInstr *MI) const; + + virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const; + + virtual + bool PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Pred) const; + + virtual + bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, + const SmallVectorImpl<MachineOperand> &Pred2) const; + + virtual bool DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const; + + // PTX is fully-predicable + virtual bool isPredicable(MachineInstr *MI) const { return true; } + + // branch support + + virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify = false) const; + + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; + + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; + + // Memory operand folding for spills + // TODO: Implement this eventually and get rid of storeRegToStackSlot and + // loadRegFromStackSlot. Doing so will get rid of the "stack" registers + // we currently use to spill, though I doubt the overall effect on ptxas + // output will be large. I have yet to see a case where ptxas is unable + // to see through the "stack" register usage and hence generates + // efficient code anyway. + // virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + // MachineInstr* MI, + // const SmallVectorImpl<unsigned> &Ops, + // int FrameIndex) const; + + virtual void storeRegToStackSlot(MachineBasicBlock& MBB, + MachineBasicBlock::iterator MII, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass* RC, + const TargetRegisterInfo* TRI) const; + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MII, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + // static helper routines + + static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, + DebugLoc dl, EVT VT, + SDValue Op1); + + static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, + DebugLoc dl, EVT VT, + SDValue Op1, SDValue Op2); + + static void AddDefaultPredicate(MachineInstr *MI); + + static bool IsAnyKindOfBranch(const MachineInstr& inst); + + static bool IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB); + + static MachineBasicBlock *GetBranchTarget(const MachineInstr& inst); +}; // class PTXInstrInfo +} // namespace llvm + +#endif // PTX_INSTR_INFO_H diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td new file mode 100644 index 0000000..bead428 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td @@ -0,0 +1,1031 @@ +//===-- PTXInstrInfo.td - PTX Instruction defs --------------*- tablegen-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the PTX instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction format superclass +//===----------------------------------------------------------------------===// + +include "PTXInstrFormats.td" + +//===----------------------------------------------------------------------===// +// Code Generation Predicates +//===----------------------------------------------------------------------===// + +// Shader Model Support +def FDivNeedsRoundingMode : Predicate<"getSubtarget().fdivNeedsRoundingMode()">; +def FDivNoRoundingMode : Predicate<"!getSubtarget().fdivNeedsRoundingMode()">; +def FMadNeedsRoundingMode : Predicate<"getSubtarget().fmadNeedsRoundingMode()">; +def FMadNoRoundingMode : Predicate<"!getSubtarget().fmadNeedsRoundingMode()">; + +// PTX Version Support +def SupportsPTX21 : Predicate<"getSubtarget().supportsPTX21()">; +def DoesNotSupportPTX21 : Predicate<"!getSubtarget().supportsPTX21()">; +def SupportsPTX22 : Predicate<"getSubtarget().supportsPTX22()">; +def DoesNotSupportPTX22 : Predicate<"!getSubtarget().supportsPTX22()">; +def SupportsPTX23 : Predicate<"getSubtarget().supportsPTX23()">; +def DoesNotSupportPTX23 : Predicate<"!getSubtarget().supportsPTX23()">; + +// Fused-Multiply Add +def SupportsFMA : Predicate<"getSubtarget().supportsFMA()">; +def DoesNotSupportFMA : Predicate<"!getSubtarget().supportsFMA()">; + + + +// def SDT_PTXCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>; +// def SDT_PTXCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; + +// def PTXcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PTXCallSeqStart, +// [SDNPHasChain, SDNPOutGlue]>; +// def PTXcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PTXCallSeqEnd, +// [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + +def PTXcall : SDNode<"PTXISD::CALL", SDTNone, + [SDNPHasChain, SDNPVariadic, SDNPOptInGlue, SDNPOutGlue]>; + + +// Branch & call targets have OtherVT type. +def brtarget : Operand<OtherVT>; +def calltarget : Operand<i32>; + +//===----------------------------------------------------------------------===// +// PTX Specific Node Definitions +//===----------------------------------------------------------------------===// + +// PTX allow generic 3-reg shifts like shl r0, r1, r2 +def PTXshl : SDNode<"ISD::SHL", SDTIntBinOp>; +def PTXsrl : SDNode<"ISD::SRL", SDTIntBinOp>; +def PTXsra : SDNode<"ISD::SRA", SDTIntBinOp>; + +def PTXexit + : SDNode<"PTXISD::EXIT", SDTNone, [SDNPHasChain]>; +def PTXret + : SDNode<"PTXISD::RET", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def PTXcopyaddress + : SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>; + + + +//===----------------------------------------------------------------------===// +// Instruction Class Templates +//===----------------------------------------------------------------------===// + +// For floating-point instructions, we cannot just embed the pattern into the +// instruction definition since we need to muck around with the rounding mode, +// and I do not know how to insert constants into instructions directly from +// pattern matches. + +//===- Floating-Point Instructions - 2 Operand Form -----------------------===// +multiclass PTX_FLOAT_2OP<string opcstr> { + def rr32 : InstPTX<(outs RegF32:$d), + (ins RndMode:$r, RegF32:$a), + !strconcat(opcstr, "$r.f32\t$d, $a"), []>; + def ri32 : InstPTX<(outs RegF32:$d), + (ins RndMode:$r, f32imm:$a), + !strconcat(opcstr, "$r.f32\t$d, $a"), []>; + def rr64 : InstPTX<(outs RegF64:$d), + (ins RndMode:$r, RegF64:$a), + !strconcat(opcstr, "$r.f64\t$d, $a"), []>; + def ri64 : InstPTX<(outs RegF64:$d), + (ins RndMode:$r, f64imm:$a), + !strconcat(opcstr, "$r.f64\t$d, $a"), []>; +} + +//===- Floating-Point Instructions - 3 Operand Form -----------------------===// +multiclass PTX_FLOAT_3OP<string opcstr> { + def rr32 : InstPTX<(outs RegF32:$d), + (ins RndMode:$r, RegF32:$a, RegF32:$b), + !strconcat(opcstr, "$r.f32\t$d, $a, $b"), []>; + def ri32 : InstPTX<(outs RegF32:$d), + (ins RndMode:$r, RegF32:$a, f32imm:$b), + !strconcat(opcstr, "$r.f32\t$d, $a, $b"), []>; + def rr64 : InstPTX<(outs RegF64:$d), + (ins RndMode:$r, RegF64:$a, RegF64:$b), + !strconcat(opcstr, "$r.f64\t$d, $a, $b"), []>; + def ri64 : InstPTX<(outs RegF64:$d), + (ins RndMode:$r, RegF64:$a, f64imm:$b), + !strconcat(opcstr, "$r.f64\t$d, $a, $b"), []>; +} + +//===- Floating-Point Instructions - 4 Operand Form -----------------------===// +multiclass PTX_FLOAT_4OP<string opcstr> { + def rrr32 : InstPTX<(outs RegF32:$d), + (ins RndMode:$r, RegF32:$a, RegF32:$b, RegF32:$c), + !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>; + def rri32 : InstPTX<(outs RegF32:$d), + (ins RndMode:$r, RegF32:$a, RegF32:$b, f32imm:$c), + !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>; + def rii32 : InstPTX<(outs RegF32:$d), + (ins RndMode:$r, RegF32:$a, f32imm:$b, f32imm:$c), + !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>; + def rrr64 : InstPTX<(outs RegF64:$d), + (ins RndMode:$r, RegF64:$a, RegF64:$b, RegF64:$c), + !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>; + def rri64 : InstPTX<(outs RegF64:$d), + (ins RndMode:$r, RegF64:$a, RegF64:$b, f64imm:$c), + !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>; + def rii64 : InstPTX<(outs RegF64:$d), + (ins RndMode:$r, RegF64:$a, f64imm:$b, f64imm:$c), + !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>; +} + +//===- Integer Instructions - 3 Operand Form ------------------------------===// +multiclass PTX_INT3<string opcstr, SDNode opnode> { + def rr16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, RegI16:$b), + !strconcat(opcstr, ".u16\t$d, $a, $b"), + [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>; + def ri16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, i16imm:$b), + !strconcat(opcstr, ".u16\t$d, $a, $b"), + [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>; + def rr32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, RegI32:$b), + !strconcat(opcstr, ".u32\t$d, $a, $b"), + [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>; + def ri32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, i32imm:$b), + !strconcat(opcstr, ".u32\t$d, $a, $b"), + [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>; + def rr64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, RegI64:$b), + !strconcat(opcstr, ".u64\t$d, $a, $b"), + [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>; + def ri64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, i64imm:$b), + !strconcat(opcstr, ".u64\t$d, $a, $b"), + [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; +} + +//===- Integer Instructions - 3 Operand Form (Signed) ---------------------===// +multiclass PTX_INT3_SIGNED<string opcstr, SDNode opnode> { + def rr16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, RegI16:$b), + !strconcat(opcstr, ".s16\t$d, $a, $b"), + [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>; + def ri16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, i16imm:$b), + !strconcat(opcstr, ".s16\t$d, $a, $b"), + [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>; + def rr32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, RegI32:$b), + !strconcat(opcstr, ".s32\t$d, $a, $b"), + [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>; + def ri32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, i32imm:$b), + !strconcat(opcstr, ".s32\t$d, $a, $b"), + [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>; + def rr64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, RegI64:$b), + !strconcat(opcstr, ".s64\t$d, $a, $b"), + [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>; + def ri64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, i64imm:$b), + !strconcat(opcstr, ".s64\t$d, $a, $b"), + [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; +} + +//===- Bitwise Logic Instructions - 3 Operand Form ------------------------===// +multiclass PTX_LOGIC<string opcstr, SDNode opnode> { + def ripreds : InstPTX<(outs RegPred:$d), + (ins RegPred:$a, i1imm:$b), + !strconcat(opcstr, ".pred\t$d, $a, $b"), + [(set RegPred:$d, (opnode RegPred:$a, imm:$b))]>; + def rrpreds : InstPTX<(outs RegPred:$d), + (ins RegPred:$a, RegPred:$b), + !strconcat(opcstr, ".pred\t$d, $a, $b"), + [(set RegPred:$d, (opnode RegPred:$a, RegPred:$b))]>; + def rr16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, RegI16:$b), + !strconcat(opcstr, ".b16\t$d, $a, $b"), + [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>; + def ri16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, i16imm:$b), + !strconcat(opcstr, ".b16\t$d, $a, $b"), + [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>; + def rr32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, RegI32:$b), + !strconcat(opcstr, ".b32\t$d, $a, $b"), + [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>; + def ri32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, i32imm:$b), + !strconcat(opcstr, ".b32\t$d, $a, $b"), + [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>; + def rr64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, RegI64:$b), + !strconcat(opcstr, ".b64\t$d, $a, $b"), + [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>; + def ri64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, i64imm:$b), + !strconcat(opcstr, ".b64\t$d, $a, $b"), + [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; +} + +//===- Integer Shift Instructions - 3 Operand Form ------------------------===// +multiclass PTX_INT3ntnc<string opcstr, SDNode opnode> { + def rr16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, RegI16:$b), + !strconcat(opcstr, "16\t$d, $a, $b"), + [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>; + def rr32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, RegI32:$b), + !strconcat(opcstr, "32\t$d, $a, $b"), + [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>; + def rr64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, RegI64:$b), + !strconcat(opcstr, "64\t$d, $a, $b"), + [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>; + def ri16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, i16imm:$b), + !strconcat(opcstr, "16\t$d, $a, $b"), + [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>; + def ri32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, i32imm:$b), + !strconcat(opcstr, "32\t$d, $a, $b"), + [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>; + def ri64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, i64imm:$b), + !strconcat(opcstr, "64\t$d, $a, $b"), + [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; + def ir16 : InstPTX<(outs RegI16:$d), + (ins i16imm:$a, RegI16:$b), + !strconcat(opcstr, "16\t$d, $a, $b"), + [(set RegI16:$d, (opnode imm:$a, RegI16:$b))]>; + def ir32 : InstPTX<(outs RegI32:$d), + (ins i32imm:$a, RegI32:$b), + !strconcat(opcstr, "32\t$d, $a, $b"), + [(set RegI32:$d, (opnode imm:$a, RegI32:$b))]>; + def ir64 : InstPTX<(outs RegI64:$d), + (ins i64imm:$a, RegI64:$b), + !strconcat(opcstr, "64\t$d, $a, $b"), + [(set RegI64:$d, (opnode imm:$a, RegI64:$b))]>; +} + +//===- Set Predicate Instructions (Int) - 3/4 Operand Forms ---------------===// +multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls, + CondCode cmp, string cmpstr> { + // TODO support 5-operand format: p|q, a, b, c + + def rr + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b), + !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), + [(set RegPred:$p, (setcc RC:$a, RC:$b, cmp))]>; + def ri + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b), + !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), + [(set RegPred:$p, (setcc RC:$a, imm:$b, cmp))]>; + + def rr_and_r + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; + def ri_and_r + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), + RegPred:$c))]>; + def rr_or_r + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; + def ri_or_r + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>; + def rr_xor_r + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; + def ri_xor_r + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), + RegPred:$c))]>; + + def rr_and_not_r + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), + (not RegPred:$c)))]>; + def ri_and_not_r + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), + (not RegPred:$c)))]>; + def rr_or_not_r + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), + (not RegPred:$c)))]>; + def ri_or_not_r + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), + (not RegPred:$c)))]>; + def rr_xor_not_r + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), + (not RegPred:$c)))]>; + def ri_xor_not_r + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), + (not RegPred:$c)))]>; +} + +//===- Set Predicate Instructions (FP) - 3/4 Operand Form -----------------===// +multiclass PTX_SETP_FP<RegisterClass RC, string regclsname, Operand immcls, + CondCode ucmp, CondCode ocmp, string cmpstr> { + // TODO support 5-operand format: p|q, a, b, c + + def rr_u + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b), + !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"), + [(set RegPred:$p, (setcc RC:$a, RC:$b, ucmp))]>; + def rr_o + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b), + !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), + [(set RegPred:$p, (setcc RC:$a, RC:$b, ocmp))]>; + + def ri_u + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b), + !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"), + [(set RegPred:$p, (setcc RC:$a, fpimm:$b, ucmp))]>; + def ri_o + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b), + !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), + [(set RegPred:$p, (setcc RC:$a, fpimm:$b, ocmp))]>; + + def rr_and_r_u + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, "u.and.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), + RegPred:$c))]>; + def rr_and_r_o + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), + RegPred:$c))]>; + + def rr_or_r_u + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, "u.or.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>; + def rr_or_r_o + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>; + + def rr_xor_r_u + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, "u.xor.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), + RegPred:$c))]>; + def rr_xor_r_o + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), + RegPred:$c))]>; + + def rr_and_not_r_u + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, "u.and.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), + (not RegPred:$c)))]>; + def rr_and_not_r_o + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), + (not RegPred:$c)))]>; + + def rr_or_not_r_u + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, "u.or.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), + (not RegPred:$c)))]>; + def rr_or_not_r_o + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), + (not RegPred:$c)))]>; + + def rr_xor_not_r_u + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, "u.xor.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), + (not RegPred:$c)))]>; + def rr_xor_not_r_o + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), + (not RegPred:$c)))]>; +} + +//===- Select Predicate Instructions - 4 Operand Form ---------------------===// +multiclass PTX_SELP<RegisterClass RC, string regclsname, Operand immcls, + SDNode immnode> { + def rr + : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, RC:$c), + !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"), + [(set RC:$r, (select RegPred:$a, RC:$b, RC:$c))]>; + def ri + : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, immcls:$c), + !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"), + [(set RC:$r, (select RegPred:$a, RC:$b, immnode:$c))]>; + def ii + : InstPTX<(outs RC:$r), (ins RegPred:$a, immcls:$b, immcls:$c), + !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"), + [(set RC:$r, (select RegPred:$a, immnode:$b, immnode:$c))]>; +} + + + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +///===- Integer Arithmetic Instructions -----------------------------------===// + +defm ADD : PTX_INT3<"add", add>; +defm SUB : PTX_INT3<"sub", sub>; +defm MUL : PTX_INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies +defm DIV : PTX_INT3<"div", udiv>; +defm SDIV : PTX_INT3_SIGNED<"div", sdiv>; +defm REM : PTX_INT3<"rem", urem>; + +///===- Floating-Point Arithmetic Instructions ----------------------------===// + +// FNEG +defm FNEG : PTX_FLOAT_2OP<"neg">; + +// Standard Binary Operations +defm FADD : PTX_FLOAT_3OP<"add">; +defm FSUB : PTX_FLOAT_3OP<"sub">; +defm FMUL : PTX_FLOAT_3OP<"mul">; +defm FDIV : PTX_FLOAT_3OP<"div">; + +// Multi-operation hybrid instructions +defm FMAD : PTX_FLOAT_4OP<"mad">, Requires<[SupportsFMA]>; + + +///===- Floating-Point Intrinsic Instructions -----------------------------===// + +// SQRT +def FSQRTrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a), + "sqrt$r.f32\t$d, $a", []>; +def FSQRTri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a), + "sqrt$r.f32\t$d, $a", []>; +def FSQRTrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a), + "sqrt$r.f64\t$d, $a", []>; +def FSQRTri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a), + "sqrt$r.f64\t$d, $a", []>; + +// SIN +def FSINrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a), + "sin$r.f32\t$d, $a", []>; +def FSINri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a), + "sin$r.f32\t$d, $a", []>; +def FSINrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a), + "sin$r.f64\t$d, $a", []>; +def FSINri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a), + "sin$r.f64\t$d, $a", []>; + +// COS +def FCOSrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a), + "cos$r.f32\t$d, $a", []>; +def FCOSri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a), + "cos$r.f32\t$d, $a", []>; +def FCOSrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a), + "cos$r.f64\t$d, $a", []>; +def FCOSri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a), + "cos$r.f64\t$d, $a", []>; + + + + +///===- Comparison and Selection Instructions -----------------------------===// + +// .setp + +// Compare u16 + +defm SETPEQu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETEQ, "eq">; +defm SETPNEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETNE, "ne">; +defm SETPLTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULT, "lt">; +defm SETPLEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULE, "le">; +defm SETPGTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGT, "gt">; +defm SETPGEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGE, "ge">; +defm SETPLTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLT, "lt">; +defm SETPLEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLE, "le">; +defm SETPGTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGT, "gt">; +defm SETPGEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGE, "ge">; + +// Compare u32 + +defm SETPEQu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETEQ, "eq">; +defm SETPNEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETNE, "ne">; +defm SETPLTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULT, "lt">; +defm SETPLEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULE, "le">; +defm SETPGTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGT, "gt">; +defm SETPGEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGE, "ge">; +defm SETPLTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLT, "lt">; +defm SETPLEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLE, "le">; +defm SETPGTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGT, "gt">; +defm SETPGEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGE, "ge">; + +// Compare u64 + +defm SETPEQu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETEQ, "eq">; +defm SETPNEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETNE, "ne">; +defm SETPLTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULT, "lt">; +defm SETPLEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULE, "le">; +defm SETPGTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGT, "gt">; +defm SETPGEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGE, "ge">; +defm SETPLTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLT, "lt">; +defm SETPLEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLE, "le">; +defm SETPGTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGT, "gt">; +defm SETPGEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGE, "ge">; + +// Compare f32 + +defm SETPEQf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUEQ, SETOEQ, "eq">; +defm SETPNEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUNE, SETONE, "ne">; +defm SETPLTf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETULT, SETOLT, "lt">; +defm SETPLEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETULE, SETOLE, "le">; +defm SETPGTf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUGT, SETOGT, "gt">; +defm SETPGEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUGE, SETOGE, "ge">; + +// Compare f64 + +defm SETPEQf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUEQ, SETOEQ, "eq">; +defm SETPNEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUNE, SETONE, "ne">; +defm SETPLTf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETULT, SETOLT, "lt">; +defm SETPLEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETULE, SETOLE, "le">; +defm SETPGTf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUGT, SETOGT, "gt">; +defm SETPGEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUGE, SETOGE, "ge">; + +// .selp + +defm SELPi16 : PTX_SELP<RegI16, "u16", i16imm, imm>; +defm SELPi32 : PTX_SELP<RegI32, "u32", i32imm, imm>; +defm SELPi64 : PTX_SELP<RegI64, "u64", i64imm, imm>; +defm SELPf32 : PTX_SELP<RegF32, "f32", f32imm, fpimm>; +defm SELPf64 : PTX_SELP<RegF64, "f64", f64imm, fpimm>; + +///===- Logic and Shift Instructions --------------------------------------===// + +defm SHL : PTX_INT3ntnc<"shl.b", PTXshl>; +defm SRL : PTX_INT3ntnc<"shr.u", PTXsrl>; +defm SRA : PTX_INT3ntnc<"shr.s", PTXsra>; + +defm AND : PTX_LOGIC<"and", and>; +defm OR : PTX_LOGIC<"or", or>; +defm XOR : PTX_LOGIC<"xor", xor>; + +///===- Data Movement and Conversion Instructions -------------------------===// + +// any_extend +// Implement the anyext instruction in terms of the PTX cvt instructions. +//def : Pat<(i32 (anyext RegI16:$a)), (CVT_u32_u16 RegI16:$a)>; +//def : Pat<(i64 (anyext RegI16:$a)), (CVT_u64_u16 RegI16:$a)>; +//def : Pat<(i64 (anyext RegI32:$a)), (CVT_u64_u32 RegI32:$a)>; + +// bitconvert +// These instructions implement the bit-wise conversion between integer and +// floating-point types. +def MOVi32f32 + : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "mov.b32\t$d, $a", []>; +def MOVf32i32 + : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "mov.b32\t$d, $a", []>; +def MOVi64f64 + : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "mov.b64\t$d, $a", []>; +def MOVf64i64 + : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "mov.b64\t$d, $a", []>; + +let neverHasSideEffects = 1 in { + def MOVPREDrr + : InstPTX<(outs RegPred:$d), (ins RegPred:$a), "mov.pred\t$d, $a", []>; + def MOVU16rr + : InstPTX<(outs RegI16:$d), (ins RegI16:$a), "mov.u16\t$d, $a", []>; + def MOVU32rr + : InstPTX<(outs RegI32:$d), (ins RegI32:$a), "mov.u32\t$d, $a", []>; + def MOVU64rr + : InstPTX<(outs RegI64:$d), (ins RegI64:$a), "mov.u64\t$d, $a", []>; + def MOVF32rr + : InstPTX<(outs RegF32:$d), (ins RegF32:$a), "mov.f32\t$d, $a", []>; + def MOVF64rr + : InstPTX<(outs RegF64:$d), (ins RegF64:$a), "mov.f64\t$d, $a", []>; +} + +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { + def MOVPREDri + : InstPTX<(outs RegPred:$d), (ins i1imm:$a), "mov.pred\t$d, $a", + [(set RegPred:$d, imm:$a)]>; + def MOVU16ri + : InstPTX<(outs RegI16:$d), (ins i16imm:$a), "mov.u16\t$d, $a", + [(set RegI16:$d, imm:$a)]>; + def MOVU32ri + : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a", + [(set RegI32:$d, imm:$a)]>; + def MOVU64ri + : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a", + [(set RegI64:$d, imm:$a)]>; + def MOVF32ri + : InstPTX<(outs RegF32:$d), (ins f32imm:$a), "mov.f32\t$d, $a", + [(set RegF32:$d, fpimm:$a)]>; + def MOVF64ri + : InstPTX<(outs RegF64:$d), (ins f64imm:$a), "mov.f64\t$d, $a", + [(set RegF64:$d, fpimm:$a)]>; +} + +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { + def MOVaddr32 + : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a", + [(set RegI32:$d, (PTXcopyaddress tglobaladdr:$a))]>; + def MOVaddr64 + : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a", + [(set RegI64:$d, (PTXcopyaddress tglobaladdr:$a))]>; + def MOVframe32 + : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "cvta.local.u32\t$d, $a", + [(set RegI32:$d, (PTXcopyaddress frameindex:$a))]>; + def MOVframe64 + : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "cvta.local.u64\t$d, $a", + [(set RegI64:$d, (PTXcopyaddress frameindex:$a))]>; +} + +// PTX cvt instructions +// Note all of these may actually be used, we just define all possible patterns +// here (that make sense). +// FIXME: Can we collapse this somehow into a multiclass def? + +// To i16 +def CVTu16u32 + : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a", []>; +def CVTu16u64 + : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a", []>; +def CVTu16f32 + : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.u16.f32\t$d, $a", []>; +def CVTs16f32 + : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.s16.f32\t$d, $a", []>; +def CVTu16f64 + : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.u16.f64\t$d, $a", []>; +def CVTs16f64 + : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.s16.f64\t$d, $a", []>; + +// To i32 +def CVTu32u16 + : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", []>; +def CVTs32s16 + : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.s32.s16\t$d, $a", []>; +def CVTu32u64 + : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a", []>; +def CVTu32f32 + : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.u32.f32\t$d, $a", []>; +def CVTs32f32 + : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.s32.f32\t$d, $a", []>; +def CVTu32f64 + : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.u32.f64\t$d, $a", []>; +def CVTs32f64 + : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.s32.f64\t$d, $a", []>; + +// To i64 +def CVTu64u16 + : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a", []>; +def CVTs64s16 + : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.s64.s16\t$d, $a", []>; +def CVTu64u32 + : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a", []>; +def CVTs64s32 + : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.s64.s32\t$d, $a", []>; +def CVTu64f32 + : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.u64.f32\t$d, $a", []>; +def CVTs64f32 + : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.s64.f32\t$d, $a", []>; +def CVTu64f64 + : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.u64.f64\t$d, $a", []>; +def CVTs64f64 + : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.s64.f64\t$d, $a", []>; + +// To f32 +def CVTf32u16 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a), + "cvt$r.f32.u16\t$d, $a", []>; +def CVTf32s16 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a), + "cvt$r.f32.s16\t$d, $a", []>; +def CVTf32u32 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a), + "cvt$r.f32.u32\t$d, $a", []>; +def CVTf32s32 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a), + "cvt$r.f32.s32\t$d, $a", []>; +def CVTf32u64 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a), + "cvt$r.f32.u64\t$d, $a", []>; +def CVTf32s64 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a), + "cvt$r.f32.s64\t$d, $a", []>; +def CVTf32f64 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.f32.f64\t$d, $a", []>; + +// To f64 +def CVTf64u16 + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a), + "cvt$r.f64.u16\t$d, $a", []>; +def CVTf64s16 + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a), + "cvt$r.f64.s16\t$d, $a", []>; +def CVTf64u32 + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a), + "cvt$r.f64.u32\t$d, $a", []>; +def CVTf64s32 + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a), + "cvt$r.f64.s32\t$d, $a", []>; +def CVTf64u64 + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a), + "cvt$r.f64.u64\t$d, $a", []>; +def CVTf64s64 + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a), + "cvt$r.f64.s64\t$d, $a", []>; +def CVTf64f32 + : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a", []>; + + ///===- Control Flow Instructions -----------------------------------------===// + +let isBranch = 1, isTerminator = 1, isBarrier = 1 in { + def BRAd + : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>; +} + +let isBranch = 1, isTerminator = 1 in { + // FIXME: The pattern part is blank because I cannot (or do not yet know + // how to) use the first operand of PredicateOperand (a RegPred register) here + // When this is revisited, make sure to also look at LowerSETCC and try to + // fold it into negated predicates, if possible. + def BRAdp + : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", + [/*(brcond pred:$_p, bb:$d)*/]>; +} + +let isReturn = 1, isTerminator = 1, isBarrier = 1 in { + def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>; + def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>; +} + +let hasSideEffects = 1 in { + def CALL : InstPTX<(outs), (ins), "call", [(PTXcall)]>; +} + +///===- Parameter Passing Pseudo-Instructions -----------------------------===// + +def READPARAMPRED : InstPTX<(outs RegPred:$a), (ins i32imm:$b), + "mov.pred\t$a, %arg$b", []>; +def READPARAMI16 : InstPTX<(outs RegI16:$a), (ins i32imm:$b), + "mov.b16\t$a, %arg$b", []>; +def READPARAMI32 : InstPTX<(outs RegI32:$a), (ins i32imm:$b), + "mov.b32\t$a, %arg$b", []>; +def READPARAMI64 : InstPTX<(outs RegI64:$a), (ins i32imm:$b), + "mov.b64\t$a, %arg$b", []>; +def READPARAMF32 : InstPTX<(outs RegF32:$a), (ins i32imm:$b), + "mov.f32\t$a, %arg$b", []>; +def READPARAMF64 : InstPTX<(outs RegF64:$a), (ins i32imm:$b), + "mov.f64\t$a, %arg$b", []>; + +def WRITEPARAMPRED : InstPTX<(outs), (ins RegPred:$a), "//w", []>; +def WRITEPARAMI16 : InstPTX<(outs), (ins RegI16:$a), "//w", []>; +def WRITEPARAMI32 : InstPTX<(outs), (ins RegI32:$a), "//w", []>; +def WRITEPARAMI64 : InstPTX<(outs), (ins RegI64:$a), "//w", []>; +def WRITEPARAMF32 : InstPTX<(outs), (ins RegF32:$a), "//w", []>; +def WRITEPARAMF64 : InstPTX<(outs), (ins RegF64:$a), "//w", []>; + + +//===----------------------------------------------------------------------===// +// Instruction Selection Patterns +//===----------------------------------------------------------------------===// + +// FADD +def : Pat<(f32 (fadd RegF32:$a, RegF32:$b)), + (FADDrr32 RndDefault, RegF32:$a, RegF32:$b)>; +def : Pat<(f32 (fadd RegF32:$a, fpimm:$b)), + (FADDri32 RndDefault, RegF32:$a, fpimm:$b)>; +def : Pat<(f64 (fadd RegF64:$a, RegF64:$b)), + (FADDrr64 RndDefault, RegF64:$a, RegF64:$b)>; +def : Pat<(f64 (fadd RegF64:$a, fpimm:$b)), + (FADDri64 RndDefault, RegF64:$a, fpimm:$b)>; + +// FSUB +def : Pat<(f32 (fsub RegF32:$a, RegF32:$b)), + (FSUBrr32 RndDefault, RegF32:$a, RegF32:$b)>; +def : Pat<(f32 (fsub RegF32:$a, fpimm:$b)), + (FSUBri32 RndDefault, RegF32:$a, fpimm:$b)>; +def : Pat<(f64 (fsub RegF64:$a, RegF64:$b)), + (FSUBrr64 RndDefault, RegF64:$a, RegF64:$b)>; +def : Pat<(f64 (fsub RegF64:$a, fpimm:$b)), + (FSUBri64 RndDefault, RegF64:$a, fpimm:$b)>; + +// FMUL +def : Pat<(f32 (fmul RegF32:$a, RegF32:$b)), + (FMULrr32 RndDefault, RegF32:$a, RegF32:$b)>; +def : Pat<(f32 (fmul RegF32:$a, fpimm:$b)), + (FMULri32 RndDefault, RegF32:$a, fpimm:$b)>; +def : Pat<(f64 (fmul RegF64:$a, RegF64:$b)), + (FMULrr64 RndDefault, RegF64:$a, RegF64:$b)>; +def : Pat<(f64 (fmul RegF64:$a, fpimm:$b)), + (FMULri64 RndDefault, RegF64:$a, fpimm:$b)>; + +// FDIV +def : Pat<(f32 (fdiv RegF32:$a, RegF32:$b)), + (FDIVrr32 RndDefault, RegF32:$a, RegF32:$b)>; +def : Pat<(f32 (fdiv RegF32:$a, fpimm:$b)), + (FDIVri32 RndDefault, RegF32:$a, fpimm:$b)>; +def : Pat<(f64 (fdiv RegF64:$a, RegF64:$b)), + (FDIVrr64 RndDefault, RegF64:$a, RegF64:$b)>; +def : Pat<(f64 (fdiv RegF64:$a, fpimm:$b)), + (FDIVri64 RndDefault, RegF64:$a, fpimm:$b)>; + +// FMUL+FADD +def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), RegF32:$c)), + (FMADrrr32 RndDefault, RegF32:$a, RegF32:$b, RegF32:$c)>, + Requires<[SupportsFMA]>; +def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)), + (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>, + Requires<[SupportsFMA]>; +def : Pat<(f32 (fadd (fmul RegF32:$a, fpimm:$b), fpimm:$c)), + (FMADrrr32 RndDefault, RegF32:$a, fpimm:$b, fpimm:$c)>, + Requires<[SupportsFMA]>; +def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)), + (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>, + Requires<[SupportsFMA]>; +def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), RegF64:$c)), + (FMADrrr64 RndDefault, RegF64:$a, RegF64:$b, RegF64:$c)>, + Requires<[SupportsFMA]>; +def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), fpimm:$c)), + (FMADrri64 RndDefault, RegF64:$a, RegF64:$b, fpimm:$c)>, + Requires<[SupportsFMA]>; +def : Pat<(f64 (fadd (fmul RegF64:$a, fpimm:$b), fpimm:$c)), + (FMADrri64 RndDefault, RegF64:$a, fpimm:$b, fpimm:$c)>, + Requires<[SupportsFMA]>; + +// FNEG +def : Pat<(f32 (fneg RegF32:$a)), (FNEGrr32 RndDefault, RegF32:$a)>; +def : Pat<(f32 (fneg fpimm:$a)), (FNEGri32 RndDefault, fpimm:$a)>; +def : Pat<(f64 (fneg RegF64:$a)), (FNEGrr64 RndDefault, RegF64:$a)>; +def : Pat<(f64 (fneg fpimm:$a)), (FNEGri64 RndDefault, fpimm:$a)>; + +// FSQRT +def : Pat<(f32 (fsqrt RegF32:$a)), (FSQRTrr32 RndDefault, RegF32:$a)>; +def : Pat<(f32 (fsqrt fpimm:$a)), (FSQRTri32 RndDefault, fpimm:$a)>; +def : Pat<(f64 (fsqrt RegF64:$a)), (FSQRTrr64 RndDefault, RegF64:$a)>; +def : Pat<(f64 (fsqrt fpimm:$a)), (FSQRTri64 RndDefault, fpimm:$a)>; + +// FSIN +def : Pat<(f32 (fsin RegF32:$a)), (FSINrr32 RndDefault, RegF32:$a)>; +def : Pat<(f32 (fsin fpimm:$a)), (FSINri32 RndDefault, fpimm:$a)>; +def : Pat<(f64 (fsin RegF64:$a)), (FSINrr64 RndDefault, RegF64:$a)>; +def : Pat<(f64 (fsin fpimm:$a)), (FSINri64 RndDefault, fpimm:$a)>; + +// FCOS +def : Pat<(f32 (fcos RegF32:$a)), (FCOSrr32 RndDefault, RegF32:$a)>; +def : Pat<(f32 (fcos fpimm:$a)), (FCOSri32 RndDefault, fpimm:$a)>; +def : Pat<(f64 (fcos RegF64:$a)), (FCOSrr64 RndDefault, RegF64:$a)>; +def : Pat<(f64 (fcos fpimm:$a)), (FCOSri64 RndDefault, fpimm:$a)>; + +// Type conversion notes: +// - PTX does not directly support converting a predicate to a value, so we +// use a select instruction to select either 0 or 1 (integer or fp) based +// on the truth value of the predicate. +// - PTX does not directly support converting to a predicate type, so we fake it +// by performing a greater-than test between the value and zero. This follows +// the C convention that any non-zero value is equivalent to 'true'. + +// Conversion to pred +def : Pat<(i1 (trunc RegI16:$a)), (SETPGTu16ri RegI16:$a, 0)>; +def : Pat<(i1 (trunc RegI32:$a)), (SETPGTu32ri RegI32:$a, 0)>; +def : Pat<(i1 (trunc RegI64:$a)), (SETPGTu64ri RegI64:$a, 0)>; +def : Pat<(i1 (fp_to_uint RegF32:$a)), (SETPGTu32ri (MOVi32f32 RegF32:$a), 0)>; +def : Pat<(i1 (fp_to_uint RegF64:$a)), (SETPGTu64ri (MOVi64f64 RegF64:$a), 0)>; + +// Conversion to u16 +def : Pat<(i16 (anyext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>; +def : Pat<(i16 (sext RegPred:$a)), (SELPi16ii RegPred:$a, 0xFFFF, 0)>; +def : Pat<(i16 (zext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>; +def : Pat<(i16 (trunc RegI32:$a)), (CVTu16u32 RegI32:$a)>; +def : Pat<(i16 (trunc RegI64:$a)), (CVTu16u64 RegI64:$a)>; +def : Pat<(i16 (fp_to_uint RegF32:$a)), (CVTu16f32 RndDefault, RegF32:$a)>; +def : Pat<(i16 (fp_to_sint RegF32:$a)), (CVTs16f32 RndDefault, RegF32:$a)>; +def : Pat<(i16 (fp_to_uint RegF64:$a)), (CVTu16f64 RndDefault, RegF64:$a)>; +def : Pat<(i16 (fp_to_sint RegF64:$a)), (CVTs16f64 RndDefault, RegF64:$a)>; + +// Conversion to u32 +def : Pat<(i32 (anyext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>; +def : Pat<(i32 (sext RegPred:$a)), (SELPi32ii RegPred:$a, 0xFFFFFFFF, 0)>; +def : Pat<(i32 (zext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>; +def : Pat<(i32 (anyext RegI16:$a)), (CVTu32u16 RegI16:$a)>; +def : Pat<(i32 (sext RegI16:$a)), (CVTs32s16 RegI16:$a)>; +def : Pat<(i32 (zext RegI16:$a)), (CVTu32u16 RegI16:$a)>; +def : Pat<(i32 (trunc RegI64:$a)), (CVTu32u64 RegI64:$a)>; +def : Pat<(i32 (fp_to_uint RegF32:$a)), (CVTu32f32 RndDefault, RegF32:$a)>; +def : Pat<(i32 (fp_to_sint RegF32:$a)), (CVTs32f32 RndDefault, RegF32:$a)>; +def : Pat<(i32 (fp_to_uint RegF64:$a)), (CVTu32f64 RndDefault, RegF64:$a)>; +def : Pat<(i32 (fp_to_sint RegF64:$a)), (CVTs32f64 RndDefault, RegF64:$a)>; +def : Pat<(i32 (bitconvert RegF32:$a)), (MOVi32f32 RegF32:$a)>; + +// Conversion to u64 +def : Pat<(i64 (anyext RegPred:$a)), (SELPi64ii RegPred:$a, 1, 0)>; +def : Pat<(i64 (sext RegPred:$a)), (SELPi64ii RegPred:$a, + 0xFFFFFFFFFFFFFFFF, 0)>; +def : Pat<(i64 (zext RegPred:$a)), (SELPi64ii RegPred:$a, 1, 0)>; +def : Pat<(i64 (anyext RegI16:$a)), (CVTu64u16 RegI16:$a)>; +def : Pat<(i64 (sext RegI16:$a)), (CVTs64s16 RegI16:$a)>; +def : Pat<(i64 (zext RegI16:$a)), (CVTu64u16 RegI16:$a)>; +def : Pat<(i64 (anyext RegI32:$a)), (CVTu64u32 RegI32:$a)>; +def : Pat<(i64 (sext RegI32:$a)), (CVTs64s32 RegI32:$a)>; +def : Pat<(i64 (zext RegI32:$a)), (CVTu64u32 RegI32:$a)>; +def : Pat<(i64 (fp_to_uint RegF32:$a)), (CVTu64f32 RndDefault, RegF32:$a)>; +def : Pat<(i64 (fp_to_sint RegF32:$a)), (CVTs64f32 RndDefault, RegF32:$a)>; +def : Pat<(i64 (fp_to_uint RegF64:$a)), (CVTu64f64 RndDefault, RegF64:$a)>; +def : Pat<(i64 (fp_to_sint RegF64:$a)), (CVTs64f64 RndDefault, RegF64:$a)>; +def : Pat<(i64 (bitconvert RegF64:$a)), (MOVi64f64 RegF64:$a)>; + +// Conversion to f32 +def : Pat<(f32 (uint_to_fp RegPred:$a)), (SELPf32rr RegPred:$a, + (MOVf32i32 0x3F800000), (MOVf32i32 0))>; +def : Pat<(f32 (uint_to_fp RegI16:$a)), (CVTf32u16 RndDefault, RegI16:$a)>; +def : Pat<(f32 (sint_to_fp RegI16:$a)), (CVTf32s16 RndDefault, RegI16:$a)>; +def : Pat<(f32 (uint_to_fp RegI32:$a)), (CVTf32u32 RndDefault, RegI32:$a)>; +def : Pat<(f32 (sint_to_fp RegI32:$a)), (CVTf32s32 RndDefault, RegI32:$a)>; +def : Pat<(f32 (uint_to_fp RegI64:$a)), (CVTf32u64 RndDefault, RegI64:$a)>; +def : Pat<(f32 (sint_to_fp RegI64:$a)), (CVTf32s64 RndDefault, RegI64:$a)>; +def : Pat<(f32 (fround RegF64:$a)), (CVTf32f64 RndDefault, RegF64:$a)>; +def : Pat<(f32 (bitconvert RegI32:$a)), (MOVf32i32 RegI32:$a)>; + +// Conversion to f64 +def : Pat<(f64 (uint_to_fp RegPred:$a)), (SELPf64rr RegPred:$a, + (MOVf64i64 0x3F80000000000000), (MOVf64i64 0))>; +def : Pat<(f64 (uint_to_fp RegI16:$a)), (CVTf64u16 RndDefault, RegI16:$a)>; +def : Pat<(f64 (sint_to_fp RegI16:$a)), (CVTf64s16 RndDefault, RegI16:$a)>; +def : Pat<(f64 (uint_to_fp RegI32:$a)), (CVTf64u32 RndDefault, RegI32:$a)>; +def : Pat<(f64 (sint_to_fp RegI32:$a)), (CVTf64s32 RndDefault, RegI32:$a)>; +def : Pat<(f64 (uint_to_fp RegI64:$a)), (CVTf64u64 RndDefault, RegI64:$a)>; +def : Pat<(f64 (sint_to_fp RegI64:$a)), (CVTf64s64 RndDefault, RegI64:$a)>; +def : Pat<(f64 (fextend RegF32:$a)), (CVTf64f32 RegF32:$a)>; +def : Pat<(f64 (bitconvert RegI64:$a)), (MOVf64i64 RegI64:$a)>; + +// setcc - predicate inversion for branch conditions +def : Pat<(i1 (setcc RegPred:$a, imm:$b, SETNE)), + (XORripreds RegPred:$a, imm:$b)>; + +///===- Intrinsic Instructions --------------------------------------------===// +include "PTXIntrinsicInstrInfo.td" + +///===- Load/Store Instructions -------------------------------------------===// +include "PTXInstrLoadStore.td" + diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td b/contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td new file mode 100644 index 0000000..7a62684 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td @@ -0,0 +1,278 @@ +//===- PTXInstrLoadStore.td - PTX Load/Store Instruction Defs -*- tablegen-*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the PTX load/store instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + + +// Addressing Predicates +// We have to differentiate between 32- and 64-bit pointer types +def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">; +def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">; + +//===----------------------------------------------------------------------===// +// Pattern Fragments for Loads/Stores +//===----------------------------------------------------------------------===// + +def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTXStateSpace::Global; + return false; +}]>; + +def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTXStateSpace::Constant; + return false; +}]>; + +def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTXStateSpace::Shared; + return false; +}]>; + +def store_global + : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTXStateSpace::Global; + return false; +}]>; + +def store_shared + : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTXStateSpace::Shared; + return false; +}]>; + +// Addressing modes. +def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>; +def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>; +def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>; +def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>; +def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>; +def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>; +def ADDRlocal32 : ComplexPattern<i32, 2, "SelectADDRlocal", [], []>; +def ADDRlocal64 : ComplexPattern<i64, 2, "SelectADDRlocal", [], []>; + +// Address operands +def MEMri32 : Operand<i32> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops RegI32, i32imm); +} +def MEMri64 : Operand<i64> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops RegI64, i64imm); +} +def LOCALri32 : Operand<i32> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops i32imm, i32imm); +} +def LOCALri64 : Operand<i64> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops i64imm, i64imm); +} +def MEMii32 : Operand<i32> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops i32imm, i32imm); +} +def MEMii64 : Operand<i64> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops i64imm, i64imm); +} +// The operand here does not correspond to an actual address, so we +// can use i32 in 64-bit address modes. +def MEMpi : Operand<i32> { + let PrintMethod = "printParamOperand"; + let MIOperandInfo = (ops i32imm); +} +def MEMret : Operand<i32> { + let PrintMethod = "printReturnOperand"; + let MIOperandInfo = (ops i32imm); +} + + +// Load/store .param space +def PTXloadparam + : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; +def PTXstoreparam + : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; + +def PTXreadparam + : SDNode<"PTXISD::READ_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; +def PTXwriteparam + : SDNode<"PTXISD::WRITE_PARAM", SDTypeProfile<0, 1, []>, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; + + + +//===----------------------------------------------------------------------===// +// Classes for loads/stores +//===----------------------------------------------------------------------===// +multiclass PTX_LD<string opstr, string typestr, + RegisterClass RC, PatFrag pat_load> { + def rr32 : InstPTX<(outs RC:$d), + (ins MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRrr32:$a))]>, + Requires<[Use32BitAddresses]>; + def rr64 : InstPTX<(outs RC:$d), + (ins MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRrr64:$a))]>, + Requires<[Use64BitAddresses]>; + def ri32 : InstPTX<(outs RC:$d), + (ins MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRri32:$a))]>, + Requires<[Use32BitAddresses]>; + def ri64 : InstPTX<(outs RC:$d), + (ins MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRri64:$a))]>, + Requires<[Use64BitAddresses]>; + def ii32 : InstPTX<(outs RC:$d), + (ins MEMii32:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRii32:$a))]>, + Requires<[Use32BitAddresses]>; + def ii64 : InstPTX<(outs RC:$d), + (ins MEMii64:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRii64:$a))]>, + Requires<[Use64BitAddresses]>; +} + +multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, + PatFrag pat_store> { + def rr32 : InstPTX<(outs), + (ins RC:$d, MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRrr32:$a)]>, + Requires<[Use32BitAddresses]>; + def rr64 : InstPTX<(outs), + (ins RC:$d, MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRrr64:$a)]>, + Requires<[Use64BitAddresses]>; + def ri32 : InstPTX<(outs), + (ins RC:$d, MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRri32:$a)]>, + Requires<[Use32BitAddresses]>; + def ri64 : InstPTX<(outs), + (ins RC:$d, MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRri64:$a)]>, + Requires<[Use64BitAddresses]>; + def ii32 : InstPTX<(outs), + (ins RC:$d, MEMii32:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRii32:$a)]>, + Requires<[Use32BitAddresses]>; + def ii64 : InstPTX<(outs), + (ins RC:$d, MEMii64:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRii64:$a)]>, + Requires<[Use64BitAddresses]>; +} + +multiclass PTX_LOCAL_LD_ST<string typestr, RegisterClass RC> { + def LDri32 : InstPTX<(outs RC:$d), (ins LOCALri32:$a), + !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (load_global ADDRlocal32:$a))]>; + def LDri64 : InstPTX<(outs RC:$d), (ins LOCALri64:$a), + !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (load_global ADDRlocal64:$a))]>; + def STri32 : InstPTX<(outs), (ins RC:$d, LOCALri32:$a), + !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")), + [(store_global RC:$d, ADDRlocal32:$a)]>; + def STri64 : InstPTX<(outs), (ins RC:$d, LOCALri64:$a), + !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")), + [(store_global RC:$d, ADDRlocal64:$a)]>; +} + +multiclass PTX_PARAM_LD_ST<string typestr, RegisterClass RC> { + let hasSideEffects = 1 in { + def LDpi : InstPTX<(outs RC:$d), (ins i32imm:$a), + !strconcat("ld.param", !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (PTXloadparam texternalsym:$a))]>; + def STpi : InstPTX<(outs), (ins i32imm:$d, RC:$a), + !strconcat("st.param", !strconcat(typestr, "\t[$d], $a")), + [(PTXstoreparam texternalsym:$d, RC:$a)]>; + } +} + +multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> { + defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>; + defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>; + defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>; + defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>; + defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>; +} + +multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> { + defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>; + defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>; + defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>; + defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>; + defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>; +} + + + +//===----------------------------------------------------------------------===// +// Instruction definitions for loads/stores +//===----------------------------------------------------------------------===// + +// Global/shared stores +defm STg : PTX_ST_ALL<"st.global", store_global>; +defm STs : PTX_ST_ALL<"st.shared", store_shared>; + +// Global/shared/constant loads +defm LDg : PTX_LD_ALL<"ld.global", load_global>; +defm LDc : PTX_LD_ALL<"ld.const", load_constant>; +defm LDs : PTX_LD_ALL<"ld.shared", load_shared>; + +// Param loads/stores +defm PARAMPRED : PTX_PARAM_LD_ST<".pred", RegPred>; +defm PARAMU16 : PTX_PARAM_LD_ST<".u16", RegI16>; +defm PARAMU32 : PTX_PARAM_LD_ST<".u32", RegI32>; +defm PARAMU64 : PTX_PARAM_LD_ST<".u64", RegI64>; +defm PARAMF32 : PTX_PARAM_LD_ST<".f32", RegF32>; +defm PARAMF64 : PTX_PARAM_LD_ST<".f64", RegF64>; + +// Local loads/stores +defm LOCALPRED : PTX_LOCAL_LD_ST<".pred", RegPred>; +defm LOCALU16 : PTX_LOCAL_LD_ST<".u16", RegI16>; +defm LOCALU32 : PTX_LOCAL_LD_ST<".u32", RegI32>; +defm LOCALU64 : PTX_LOCAL_LD_ST<".u64", RegI64>; +defm LOCALF32 : PTX_LOCAL_LD_ST<".f32", RegF32>; +defm LOCALF64 : PTX_LOCAL_LD_ST<".f64", RegF64>; + diff --git a/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td new file mode 100644 index 0000000..3416f1c --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td @@ -0,0 +1,110 @@ +//===-- PTXIntrinsicInstrInfo.td - Defines PTX intrinsics --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the PTX-specific intrinsic instructions. +// +//===----------------------------------------------------------------------===// + +// PTX Special Purpose Register Accessor Intrinsics + +class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop> + : InstPTX<(outs RegI64:$d), (ins), + !strconcat("mov.u64\t$d, %", regname), + [(set RegI64:$d, (intop))]>; + +class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop> + : InstPTX<(outs RegI32:$d), (ins), + !strconcat("mov.u32\t$d, %", regname), + [(set RegI32:$d, (intop))]>; + +// TODO Add read vector-version of special registers + +//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid", +// int_ptx_read_tid_r64>; +def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", + int_ptx_read_tid_x>; +def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", + int_ptx_read_tid_y>; +def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", + int_ptx_read_tid_z>; +def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", + int_ptx_read_tid_w>; + +//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid", +// int_ptx_read_ntid_r64>; +def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", + int_ptx_read_ntid_x>; +def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", + int_ptx_read_ntid_y>; +def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", + int_ptx_read_ntid_z>; +def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", + int_ptx_read_ntid_w>; + +def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid", + int_ptx_read_laneid>; +def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid", + int_ptx_read_warpid>; +def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", + int_ptx_read_nwarpid>; + +//def PTX_READ_CTAID_R64 : +//PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>; +def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", + int_ptx_read_ctaid_x>; +def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", + int_ptx_read_ctaid_y>; +def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", + int_ptx_read_ctaid_z>; +def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", + int_ptx_read_ctaid_w>; + +//def PTX_READ_NCTAID_R64 : +//PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>; +def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", + int_ptx_read_nctaid_x>; +def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", + int_ptx_read_nctaid_y>; +def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", + int_ptx_read_nctaid_z>; +def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", + int_ptx_read_nctaid_w>; + +def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid", + int_ptx_read_smid>; +def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", + int_ptx_read_nsmid>; +def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid", + int_ptx_read_gridid>; + +def PTX_READ_LANEMASK_EQ + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>; +def PTX_READ_LANEMASK_LE + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>; +def PTX_READ_LANEMASK_LT + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>; +def PTX_READ_LANEMASK_GE + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>; +def PTX_READ_LANEMASK_GT + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>; + +def PTX_READ_CLOCK + : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>; +def PTX_READ_CLOCK64 + : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>; + +def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>; +def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>; +def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>; +def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>; + +// PTX Parallel Synchronization and Communication Intrinsics + +def PTX_BAR_SYNC : InstPTX<(outs), (ins i32imm:$i), "bar.sync\t$i", + [(int_ptx_bar_sync imm:$i)]>; diff --git a/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp b/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp new file mode 100644 index 0000000..3ed67a6 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp @@ -0,0 +1,556 @@ +//===-- PTXMCAsmStreamer.cpp - PTX Text Assembly Output -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/PathV2.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { +class PTXMCAsmStreamer : public MCStreamer { + formatted_raw_ostream &OS; + const MCAsmInfo &MAI; + OwningPtr<MCInstPrinter> InstPrinter; + OwningPtr<MCCodeEmitter> Emitter; + + SmallString<128> CommentToEmit; + raw_svector_ostream CommentStream; + + unsigned IsVerboseAsm : 1; + unsigned ShowInst : 1; + +public: + PTXMCAsmStreamer(MCContext &Context, + formatted_raw_ostream &os, + bool isVerboseAsm, bool useLoc, + MCInstPrinter *printer, + MCCodeEmitter *emitter, + bool showInst) + : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()), + InstPrinter(printer), Emitter(emitter), CommentStream(CommentToEmit), + IsVerboseAsm(isVerboseAsm), + ShowInst(showInst) { + if (InstPrinter && IsVerboseAsm) + InstPrinter->setCommentStream(CommentStream); + } + + ~PTXMCAsmStreamer() {} + + inline void EmitEOL() { + // If we don't have any comments, just emit a \n. + if (!IsVerboseAsm) { + OS << '\n'; + return; + } + EmitCommentsAndEOL(); + } + void EmitCommentsAndEOL(); + + /// isVerboseAsm - Return true if this streamer supports verbose assembly at + /// all. + virtual bool isVerboseAsm() const { return IsVerboseAsm; } + + /// hasRawTextSupport - We support EmitRawText. + virtual bool hasRawTextSupport() const { return true; } + + /// AddComment - Add a comment that can be emitted to the generated .s + /// file if applicable as a QoI issue to make the output of the compiler + /// more readable. This only affects the MCAsmStreamer, and only when + /// verbose assembly output is enabled. + virtual void AddComment(const Twine &T); + + /// AddEncodingComment - Add a comment showing the encoding of an instruction. + virtual void AddEncodingComment(const MCInst &Inst); + + /// GetCommentOS - Return a raw_ostream that comments can be written to. + /// Unlike AddComment, you are required to terminate comments with \n if you + /// use this method. + virtual raw_ostream &GetCommentOS() { + if (!IsVerboseAsm) + return nulls(); // Discard comments unless in verbose asm mode. + return CommentStream; + } + + /// AddBlankLine - Emit a blank line to a .s file to pretty it up. + virtual void AddBlankLine() { + EmitEOL(); + } + + /// @name MCStreamer Interface + /// @{ + + virtual void ChangeSection(const MCSection *Section); + virtual void InitSections() { /* PTX does not use sections */ } + + virtual void EmitLabel(MCSymbol *Symbol); + + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); + + virtual void EmitThumbFunc(MCSymbol *Func); + + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); + + virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol); + + virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta, + const MCSymbol *LastLabel, + const MCSymbol *Label, + unsigned PointerSize); + + virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); + + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue); + virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol); + virtual void EmitCOFFSymbolStorageClass(int StorageClass); + virtual void EmitCOFFSymbolType(int Type); + virtual void EndCOFFSymbolDef(); + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value); + virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); + + /// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol. + /// + /// @param Symbol - The common symbol to emit. + /// @param Size - The size of the common symbol. + /// @param ByteAlignment - The alignment of the common symbol in bytes. + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); + + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + unsigned Size = 0, unsigned ByteAlignment = 0); + + virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment = 0); + + virtual void EmitBytes(StringRef Data, unsigned AddrSpace); + + virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, + unsigned AddrSpace); + virtual void EmitULEB128Value(const MCExpr *Value); + virtual void EmitSLEB128Value(const MCExpr *Value); + virtual void EmitGPRel32Value(const MCExpr *Value); + + + virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue, + unsigned AddrSpace); + + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, + unsigned ValueSize = 1, + unsigned MaxBytesToEmit = 0); + + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit = 0); + + virtual bool EmitValueToOffset(const MCExpr *Offset, + unsigned char Value = 0); + + virtual void EmitFileDirective(StringRef Filename); + virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory, + StringRef Filename); + + virtual void EmitInstruction(const MCInst &Inst); + + /// EmitRawText - If this file is backed by an assembly streamer, this dumps + /// the specified string in the output .s file. This capability is + /// indicated by the hasRawTextSupport() predicate. + virtual void EmitRawText(StringRef String); + + virtual void FinishImpl(); + + /// @} + +}; // class PTXMCAsmStreamer + +} + +/// TODO: Add appropriate implementation of Emit*() methods when needed + +void PTXMCAsmStreamer::AddComment(const Twine &T) { + if (!IsVerboseAsm) return; + + // Make sure that CommentStream is flushed. + CommentStream.flush(); + + T.toVector(CommentToEmit); + // Each comment goes on its own line. + CommentToEmit.push_back('\n'); + + // Tell the comment stream that the vector changed underneath it. + CommentStream.resync(); +} + +void PTXMCAsmStreamer::EmitCommentsAndEOL() { + if (CommentToEmit.empty() && CommentStream.GetNumBytesInBuffer() == 0) { + OS << '\n'; + return; + } + + CommentStream.flush(); + StringRef Comments = CommentToEmit.str(); + + assert(Comments.back() == '\n' && + "Comment array not newline terminated"); + do { + // Emit a line of comments. + OS.PadToColumn(MAI.getCommentColumn()); + size_t Position = Comments.find('\n'); + OS << MAI.getCommentString() << ' ' << Comments.substr(0, Position) << '\n'; + + Comments = Comments.substr(Position+1); + } while (!Comments.empty()); + + CommentToEmit.clear(); + // Tell the comment stream that the vector changed underneath it. + CommentStream.resync(); +} + +static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) { + assert(Bytes && "Invalid size!"); + return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8)); +} + +void PTXMCAsmStreamer::ChangeSection(const MCSection *Section) { + assert(Section && "Cannot switch to a null section!"); +} + +void PTXMCAsmStreamer::EmitLabel(MCSymbol *Symbol) { + assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); + assert(getCurrentSection() && "Cannot emit before setting section!"); + + OS << *Symbol << MAI.getLabelSuffix(); + EmitEOL(); + Symbol->setSection(*getCurrentSection()); +} + +void PTXMCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {} + +void PTXMCAsmStreamer::EmitThumbFunc(MCSymbol *Func) {} + +void PTXMCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { + OS << *Symbol << " = " << *Value; + EmitEOL(); + + // FIXME: Lift context changes into super class. + Symbol->setVariableValue(Value); +} + +void PTXMCAsmStreamer::EmitWeakReference(MCSymbol *Alias, + const MCSymbol *Symbol) { + OS << ".weakref " << *Alias << ", " << *Symbol; + EmitEOL(); +} + +void PTXMCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta, + const MCSymbol *LastLabel, + const MCSymbol *Label, + unsigned PointerSize) { + report_fatal_error("Unimplemented."); +} + +void PTXMCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, + MCSymbolAttr Attribute) {} + +void PTXMCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {} + +void PTXMCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {} + +void PTXMCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) {} + +void PTXMCAsmStreamer::EmitCOFFSymbolType (int Type) {} + +void PTXMCAsmStreamer::EndCOFFSymbolDef() {} + +void PTXMCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {} + +void PTXMCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) {} + +void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) {} + +void PTXMCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, + unsigned Size, unsigned ByteAlignment) {} + +void PTXMCAsmStreamer::EmitTBSSSymbol(const MCSection *Section, + MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment) {} + +static inline char toOctal(int X) { return (X&7)+'0'; } + +static void PrintQuotedString(StringRef Data, raw_ostream &OS) { + OS << '"'; + + for (unsigned i = 0, e = Data.size(); i != e; ++i) { + unsigned char C = Data[i]; + if (C == '"' || C == '\\') { + OS << '\\' << (char)C; + continue; + } + + if (isprint((unsigned char)C)) { + OS << (char)C; + continue; + } + + switch (C) { + case '\b': OS << "\\b"; break; + case '\f': OS << "\\f"; break; + case '\n': OS << "\\n"; break; + case '\r': OS << "\\r"; break; + case '\t': OS << "\\t"; break; + default: + OS << '\\'; + OS << toOctal(C >> 6); + OS << toOctal(C >> 3); + OS << toOctal(C >> 0); + break; + } + } + + OS << '"'; +} + +void PTXMCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { + assert(getCurrentSection() && "Cannot emit contents before setting section!"); + if (Data.empty()) return; + + if (Data.size() == 1) { + OS << MAI.getData8bitsDirective(AddrSpace); + OS << (unsigned)(unsigned char)Data[0]; + EmitEOL(); + return; + } + + // If the data ends with 0 and the target supports .asciz, use it, otherwise + // use .ascii + if (MAI.getAscizDirective() && Data.back() == 0) { + OS << MAI.getAscizDirective(); + Data = Data.substr(0, Data.size()-1); + } else { + OS << MAI.getAsciiDirective(); + } + + OS << ' '; + PrintQuotedString(Data, OS); + EmitEOL(); +} + +void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, + unsigned AddrSpace) { + assert(getCurrentSection() && "Cannot emit contents before setting section!"); + const char *Directive = 0; + switch (Size) { + default: break; + case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break; + case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break; + case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break; + case 8: + Directive = MAI.getData64bitsDirective(AddrSpace); + // If the target doesn't support 64-bit data, emit as two 32-bit halves. + if (Directive) break; + int64_t IntValue; + if (!Value->EvaluateAsAbsolute(IntValue)) + report_fatal_error("Don't know how to emit this value."); + if (getContext().getAsmInfo().isLittleEndian()) { + EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace); + EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace); + } else { + EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace); + EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace); + } + return; + } + + assert(Directive && "Invalid size for machine code value!"); + OS << Directive << *Value; + EmitEOL(); +} + +void PTXMCAsmStreamer::EmitULEB128Value(const MCExpr *Value) { + assert(MAI.hasLEB128() && "Cannot print a .uleb"); + OS << ".uleb128 " << *Value; + EmitEOL(); +} + +void PTXMCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) { + assert(MAI.hasLEB128() && "Cannot print a .sleb"); + OS << ".sleb128 " << *Value; + EmitEOL(); +} + +void PTXMCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) { + assert(MAI.getGPRel32Directive() != 0); + OS << MAI.getGPRel32Directive() << *Value; + EmitEOL(); +} + + +/// EmitFill - Emit NumBytes bytes worth of the value specified by +/// FillValue. This implements directives such as '.space'. +void PTXMCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue, + unsigned AddrSpace) { + if (NumBytes == 0) return; + + if (AddrSpace == 0) + if (const char *ZeroDirective = MAI.getZeroDirective()) { + OS << ZeroDirective << NumBytes; + if (FillValue != 0) + OS << ',' << (int)FillValue; + EmitEOL(); + return; + } + + // Emit a byte at a time. + MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace); +} + +void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, + int64_t Value, + unsigned ValueSize, + unsigned MaxBytesToEmit) { + // Some assemblers don't support non-power of two alignments, so we always + // emit alignments as a power of two if possible. + if (isPowerOf2_32(ByteAlignment)) { + switch (ValueSize) { + default: llvm_unreachable("Invalid size for machine code value!"); + case 1: OS << MAI.getAlignDirective(); break; + // FIXME: use MAI for this! + case 2: OS << ".p2alignw "; break; + case 4: OS << ".p2alignl "; break; + case 8: llvm_unreachable("Unsupported alignment size!"); + } + + if (MAI.getAlignmentIsInBytes()) + OS << ByteAlignment; + else + OS << Log2_32(ByteAlignment); + + if (Value || MaxBytesToEmit) { + OS << ", 0x"; + OS.write_hex(truncateToSize(Value, ValueSize)); + + if (MaxBytesToEmit) + OS << ", " << MaxBytesToEmit; + } + EmitEOL(); + return; + } + + // Non-power of two alignment. This is not widely supported by assemblers. + // FIXME: Parameterize this based on MAI. + switch (ValueSize) { + default: llvm_unreachable("Invalid size for machine code value!"); + case 1: OS << ".balign"; break; + case 2: OS << ".balignw"; break; + case 4: OS << ".balignl"; break; + case 8: llvm_unreachable("Unsupported alignment size!"); + } + + OS << ' ' << ByteAlignment; + OS << ", " << truncateToSize(Value, ValueSize); + if (MaxBytesToEmit) + OS << ", " << MaxBytesToEmit; + EmitEOL(); +} + +void PTXMCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit) {} + +bool PTXMCAsmStreamer::EmitValueToOffset(const MCExpr *Offset, + unsigned char Value) {return false;} + + +void PTXMCAsmStreamer::EmitFileDirective(StringRef Filename) { + assert(MAI.hasSingleParameterDotFile()); + OS << "\t.file\t"; + PrintQuotedString(Filename, OS); + EmitEOL(); +} + +// FIXME: should we inherit from MCAsmStreamer? +bool PTXMCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, + StringRef Directory, + StringRef Filename) { + if (!Directory.empty()) { + if (sys::path::is_absolute(Filename)) + return EmitDwarfFileDirective(FileNo, "", Filename); + SmallString<128> FullPathName = Directory; + sys::path::append(FullPathName, Filename); + return EmitDwarfFileDirective(FileNo, "", FullPathName); + } + + OS << "\t.file\t" << FileNo << ' '; + PrintQuotedString(Filename, OS); + EmitEOL(); + return this->MCStreamer::EmitDwarfFileDirective(FileNo, Directory, Filename); +} + +void PTXMCAsmStreamer::AddEncodingComment(const MCInst &Inst) {} + +void PTXMCAsmStreamer::EmitInstruction(const MCInst &Inst) { + assert(getCurrentSection() && "Cannot emit contents before setting section!"); + + // Show the encoding in a comment if we have a code emitter. + if (Emitter) + AddEncodingComment(Inst); + + // Show the MCInst if enabled. + if (ShowInst) { + Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n "); + GetCommentOS() << "\n"; + } + + // If we have an AsmPrinter, use that to print, otherwise print the MCInst. + if (InstPrinter) + InstPrinter->printInst(&Inst, OS, ""); + else + Inst.print(OS, &MAI); + EmitEOL(); +} + +/// EmitRawText - If this file is backed by an assembly streamer, this dumps +/// the specified string in the output .s file. This capability is +/// indicated by the hasRawTextSupport() predicate. +void PTXMCAsmStreamer::EmitRawText(StringRef String) { + if (!String.empty() && String.back() == '\n') + String = String.substr(0, String.size()-1); + OS << String; + EmitEOL(); +} + +void PTXMCAsmStreamer::FinishImpl() {} + +namespace llvm { + MCStreamer *createPTXAsmStreamer(MCContext &Context, + formatted_raw_ostream &OS, + bool isVerboseAsm, bool useLoc, bool useCFI, + bool useDwarfDirectory, + MCInstPrinter *IP, + MCCodeEmitter *CE, MCAsmBackend *MAB, + bool ShowInst) { + return new PTXMCAsmStreamer(Context, OS, isVerboseAsm, useLoc, + IP, CE, ShowInst); + } +} diff --git a/contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp b/contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp new file mode 100644 index 0000000..142e639 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp @@ -0,0 +1,32 @@ +//===-- PTXMCInstLower.cpp - Convert PTX MachineInstr to an MCInst --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower PTX MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "PTX.h" +#include "PTXAsmPrinter.h" +#include "llvm/Constants.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Target/Mangler.h" + +void llvm::LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, + PTXAsmPrinter &AP) { + OutMI.setOpcode(MI->getOpcode()); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + MCOperand MCOp; + OutMI.addOperand(AP.lowerOperand(MO)); + } +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp b/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp new file mode 100644 index 0000000..172a0e0 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp @@ -0,0 +1,85 @@ +//===-- PTXMFInfoExtract.cpp - Extract PTX machine function info ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an information extractor for PTX machine functions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ptx-mf-info-extract" + +#include "PTX.h" +#include "PTXTargetMachine.h" +#include "PTXMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +// NOTE: PTXMFInfoExtract must after register allocation! + +namespace { + /// PTXMFInfoExtract - PTX specific code to extract of PTX machine + /// function information for PTXAsmPrinter + /// + class PTXMFInfoExtract : public MachineFunctionPass { + private: + static char ID; + + public: + PTXMFInfoExtract(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel) + : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "PTX Machine Function Info Extractor"; + } + }; // class PTXMFInfoExtract +} // end anonymous namespace + +using namespace llvm; + +char PTXMFInfoExtract::ID = 0; + +bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) { + PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + // Generate list of all virtual registers used in this function + for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + const TargetRegisterClass *TRC = MRI.getRegClass(Reg); + unsigned RegType; + if (TRC == PTX::RegPredRegisterClass) + RegType = PTXRegisterType::Pred; + else if (TRC == PTX::RegI16RegisterClass) + RegType = PTXRegisterType::B16; + else if (TRC == PTX::RegI32RegisterClass) + RegType = PTXRegisterType::B32; + else if (TRC == PTX::RegI64RegisterClass) + RegType = PTXRegisterType::B64; + else if (TRC == PTX::RegF32RegisterClass) + RegType = PTXRegisterType::F32; + else if (TRC == PTX::RegF64RegisterClass) + RegType = PTXRegisterType::F64; + else + llvm_unreachable("Unkown register class."); + MFI->addRegister(Reg, RegType, PTXRegisterSpace::Reg); + } + + return false; +} + +FunctionPass *llvm::createPTXMFInfoExtract(PTXTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new PTXMFInfoExtract(TM, OptLevel); +} diff --git a/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.cpp new file mode 100644 index 0000000..60acfc7 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.cpp @@ -0,0 +1,14 @@ +//===-- PTXMachineFuctionInfo.cpp - PTX machine function info -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PTXMachineFunctionInfo.h" + +using namespace llvm; + +void PTXMachineFunctionInfo::anchor() { } diff --git a/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h new file mode 100644 index 0000000..bb7574c --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h @@ -0,0 +1,202 @@ +//===-- PTXMachineFuctionInfo.h - PTX machine function info ------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares PTX-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_MACHINE_FUNCTION_INFO_H +#define PTX_MACHINE_FUNCTION_INFO_H + +#include "PTX.h" +#include "PTXParamManager.h" +#include "PTXRegisterInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +/// PTXMachineFunctionInfo - This class is derived from MachineFunction and +/// contains private PTX target-specific information for each MachineFunction. +/// +class PTXMachineFunctionInfo : public MachineFunctionInfo { + virtual void anchor(); + bool IsKernel; + DenseSet<unsigned> RegArgs; + DenseSet<unsigned> RegRets; + + typedef DenseMap<int, std::string> FrameMap; + + FrameMap FrameSymbols; + + struct RegisterInfo { + unsigned Reg; + unsigned Type; + unsigned Space; + unsigned Offset; + unsigned Encoded; + }; + + typedef DenseMap<unsigned, RegisterInfo> RegisterInfoMap; + + RegisterInfoMap RegInfo; + + PTXParamManager ParamManager; + +public: + typedef DenseSet<unsigned>::const_iterator reg_iterator; + + PTXMachineFunctionInfo(MachineFunction &MF) + : IsKernel(false) { + } + + /// getParamManager - Returns the PTXParamManager instance for this function. + PTXParamManager& getParamManager() { return ParamManager; } + const PTXParamManager& getParamManager() const { return ParamManager; } + + /// setKernel/isKernel - Gets/sets a flag that indicates if this function is + /// a PTX kernel function. + void setKernel(bool _IsKernel=true) { IsKernel = _IsKernel; } + bool isKernel() const { return IsKernel; } + + /// argreg_begin/argreg_end - Returns iterators to the set of registers + /// containing function arguments. + reg_iterator argreg_begin() const { return RegArgs.begin(); } + reg_iterator argreg_end() const { return RegArgs.end(); } + + /// retreg_begin/retreg_end - Returns iterators to the set of registers + /// containing the function return values. + reg_iterator retreg_begin() const { return RegRets.begin(); } + reg_iterator retreg_end() const { return RegRets.end(); } + + /// addRegister - Adds a virtual register to the set of all used registers + void addRegister(unsigned Reg, unsigned RegType, unsigned RegSpace) { + if (!RegInfo.count(Reg)) { + RegisterInfo Info; + Info.Reg = Reg; + Info.Type = RegType; + Info.Space = RegSpace; + + // Determine register offset + Info.Offset = 0; + for(RegisterInfoMap::const_iterator i = RegInfo.begin(), + e = RegInfo.end(); i != e; ++i) { + const RegisterInfo& RI = i->second; + if (RI.Space == RegSpace) + if (RI.Space != PTXRegisterSpace::Reg || RI.Type == Info.Type) + Info.Offset++; + } + + // Encode the register data into a single register number + Info.Encoded = (Info.Offset << 6) | (Info.Type << 3) | Info.Space; + + RegInfo[Reg] = Info; + + if (RegSpace == PTXRegisterSpace::Argument) + RegArgs.insert(Reg); + else if (RegSpace == PTXRegisterSpace::Return) + RegRets.insert(Reg); + } + } + + /// countRegisters - Returns the number of registers of the given type and + /// space. + unsigned countRegisters(unsigned RegType, unsigned RegSpace) const { + unsigned Count = 0; + for(RegisterInfoMap::const_iterator i = RegInfo.begin(), e = RegInfo.end(); + i != e; ++i) { + const RegisterInfo& RI = i->second; + if (RI.Type == RegType && RI.Space == RegSpace) + Count++; + } + return Count; + } + + /// getEncodedRegister - Returns the encoded value of the register. + unsigned getEncodedRegister(unsigned Reg) const { + return RegInfo.lookup(Reg).Encoded; + } + + /// addRetReg - Adds a register to the set of return-value registers. + void addRetReg(unsigned Reg) { + if (!RegRets.count(Reg)) { + RegRets.insert(Reg); + } + } + + /// addArgReg - Adds a register to the set of function argument registers. + void addArgReg(unsigned Reg) { + RegArgs.insert(Reg); + } + + /// getRegisterName - Returns the name of the specified virtual register. This + /// name is used during PTX emission. + std::string getRegisterName(unsigned Reg) const { + if (RegInfo.count(Reg)) { + const RegisterInfo& RI = RegInfo.lookup(Reg); + std::string Name; + raw_string_ostream NameStr(Name); + decodeRegisterName(NameStr, RI.Encoded); + NameStr.flush(); + return Name; + } + else if (Reg == PTX::NoRegister) + return "%noreg"; + else + llvm_unreachable("Register not in register name map"); + } + + /// getEncodedRegisterName - Returns the name of the encoded register. + std::string getEncodedRegisterName(unsigned EncodedReg) const { + std::string Name; + raw_string_ostream NameStr(Name); + decodeRegisterName(NameStr, EncodedReg); + NameStr.flush(); + return Name; + } + + /// getRegisterType - Returns the type of the specified virtual register. + unsigned getRegisterType(unsigned Reg) const { + if (RegInfo.count(Reg)) + return RegInfo.lookup(Reg).Type; + else + llvm_unreachable("Unknown register"); + } + + /// getOffsetForRegister - Returns the offset of the virtual register + unsigned getOffsetForRegister(unsigned Reg) const { + if (RegInfo.count(Reg)) + return RegInfo.lookup(Reg).Offset; + else + return 0; + } + + /// getFrameSymbol - Returns the symbol name for the given FrameIndex. + const char* getFrameSymbol(int FrameIndex) { + if (FrameSymbols.count(FrameIndex)) { + return FrameSymbols.lookup(FrameIndex).c_str(); + } else { + std::string Name = "__local"; + Name += utostr(FrameIndex); + // The whole point of caching this name is to ensure the pointer we pass + // to any getExternalSymbol() calls will remain valid for the lifetime of + // the back-end instance. This is to work around an issue in SelectionDAG + // where symbol names are expected to be life-long strings. + FrameSymbols[FrameIndex] = Name; + return FrameSymbols[FrameIndex].c_str(); + } + } +}; // class PTXMachineFunctionInfo +} // namespace llvm + +#endif // PTX_MACHINE_FUNCTION_INFO_H diff --git a/contrib/llvm/lib/Target/PTX/PTXParamManager.cpp b/contrib/llvm/lib/Target/PTX/PTXParamManager.cpp new file mode 100644 index 0000000..cc1cc71 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXParamManager.cpp @@ -0,0 +1,73 @@ +//===-- PTXParamManager.cpp - Manager for .param variables ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PTXParamManager class. +// +//===----------------------------------------------------------------------===// + +#include "PTXParamManager.h" +#include "PTX.h" +#include "llvm/ADT/StringExtras.h" + +using namespace llvm; + +PTXParamManager::PTXParamManager() { +} + +unsigned PTXParamManager::addArgumentParam(unsigned Size) { + PTXParam Param; + Param.Type = PTX_PARAM_TYPE_ARGUMENT; + Param.Size = Size; + + std::string Name; + Name = "__param_"; + Name += utostr(ArgumentParams.size()+1); + Param.Name = Name; + + unsigned Index = AllParams.size(); + AllParams[Index] = Param; + ArgumentParams.push_back(Index); + + return Index; +} + +unsigned PTXParamManager::addReturnParam(unsigned Size) { + PTXParam Param; + Param.Type = PTX_PARAM_TYPE_RETURN; + Param.Size = Size; + + std::string Name; + Name = "__ret_"; + Name += utostr(ReturnParams.size()+1); + Param.Name = Name; + + unsigned Index = AllParams.size(); + AllParams[Index] = Param; + ReturnParams.push_back(Index); + + return Index; +} + +unsigned PTXParamManager::addLocalParam(unsigned Size) { + PTXParam Param; + Param.Type = PTX_PARAM_TYPE_LOCAL; + Param.Size = Size; + + std::string Name; + Name = "__localparam_"; + Name += utostr(LocalParams.size()+1); + Param.Name = Name; + + unsigned Index = AllParams.size(); + AllParams[Index] = Param; + LocalParams.push_back(Index); + + return Index; +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXParamManager.h b/contrib/llvm/lib/Target/PTX/PTXParamManager.h new file mode 100644 index 0000000..92e7728 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXParamManager.h @@ -0,0 +1,87 @@ +//===-- PTXParamManager.h - Manager for .param variables --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PTXParamManager class, which manages all defined .param +// variables for a particular function. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_PARAM_MANAGER_H +#define PTX_PARAM_MANAGER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include <string> + +namespace llvm { + +/// PTXParamManager - This class manages all .param variables defined for a +/// particular function. +class PTXParamManager { +private: + + /// PTXParamType - Type of a .param variable + enum PTXParamType { + PTX_PARAM_TYPE_ARGUMENT, + PTX_PARAM_TYPE_RETURN, + PTX_PARAM_TYPE_LOCAL + }; + + /// PTXParam - Definition of a PTX .param variable + struct PTXParam { + PTXParamType Type; + unsigned Size; + std::string Name; + }; + + DenseMap<unsigned, PTXParam> AllParams; + SmallVector<unsigned, 4> ArgumentParams; + SmallVector<unsigned, 4> ReturnParams; + SmallVector<unsigned, 4> LocalParams; + +public: + + typedef SmallVector<unsigned, 4>::const_iterator param_iterator; + + PTXParamManager(); + + param_iterator arg_begin() const { return ArgumentParams.begin(); } + param_iterator arg_end() const { return ArgumentParams.end(); } + param_iterator ret_begin() const { return ReturnParams.begin(); } + param_iterator ret_end() const { return ReturnParams.end(); } + param_iterator local_begin() const { return LocalParams.begin(); } + param_iterator local_end() const { return LocalParams.end(); } + + /// addArgumentParam - Returns a new .param used as an argument. + unsigned addArgumentParam(unsigned Size); + + /// addReturnParam - Returns a new .param used as a return argument. + unsigned addReturnParam(unsigned Size); + + /// addLocalParam - Returns a new .param used as a local .param variable. + unsigned addLocalParam(unsigned Size); + + /// getParamName - Returns the name of the parameter as a string. + const std::string &getParamName(unsigned Param) const { + assert(AllParams.count(Param) == 1 && "Param has not been defined!"); + return AllParams.find(Param)->second.Name; + } + + /// getParamSize - Returns the size of the parameter in bits. + unsigned getParamSize(unsigned Param) const { + assert(AllParams.count(Param) == 1 && "Param has not been defined!"); + return AllParams.find(Param)->second.Size; + } + +}; + +} + +#endif + diff --git a/contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp b/contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp new file mode 100644 index 0000000..7fd5375 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp @@ -0,0 +1,53 @@ +//===-- PTXRegAlloc.cpp - PTX Register Allocator --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a register allocator for PTX code. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ptx-reg-alloc" + +#include "PTX.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/RegAllocRegistry.h" + +using namespace llvm; + +namespace { + // Special register allocator for PTX. + class PTXRegAlloc : public MachineFunctionPass { + public: + static char ID; + PTXRegAlloc() : MachineFunctionPass(ID) {} + + virtual const char* getPassName() const { + return "PTX Register Allocator"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual bool runOnMachineFunction(MachineFunction &MF) { + // We do not actually do anything (at least not yet). + return false; + } + }; + + char PTXRegAlloc::ID = 0; + + static RegisterRegAlloc + ptxRegAlloc("ptx", "PTX register allocator", createPTXRegisterAllocator); +} + +FunctionPass *llvm::createPTXRegisterAllocator() { + return new PTXRegAlloc(); +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp new file mode 100644 index 0000000..b6ffd38 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp @@ -0,0 +1,38 @@ +//===-- PTXRegisterInfo.cpp - PTX Register Information --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PTX implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "PTXRegisterInfo.h" +#include "PTX.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define GET_REGINFO_TARGET_DESC +#include "PTXGenRegisterInfo.inc" + +using namespace llvm; + +PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM, + const TargetInstrInfo &tii) + // PTX does not have a return address register. + : PTXGenRegisterInfo(0), TII(tii) { +} + +void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator /*II*/, + int /*SPAdj*/, + RegScavenger * /*RS*/) const { + llvm_unreachable("FrameIndex should have been previously eliminated!"); +} diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h new file mode 100644 index 0000000..5614ce7 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h @@ -0,0 +1,56 @@ +//===-- PTXRegisterInfo.h - PTX Register Information Impl -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PTX implementation of the MRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_REGISTER_INFO_H +#define PTX_REGISTER_INFO_H + +#include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/BitVector.h" + +#define GET_REGINFO_HEADER +#include "PTXGenRegisterInfo.inc" + +namespace llvm { +class PTXTargetMachine; +class MachineFunction; + +struct PTXRegisterInfo : public PTXGenRegisterInfo { +private: + const TargetInstrInfo &TII; + +public: + PTXRegisterInfo(PTXTargetMachine &TM, + const TargetInstrInfo &tii); + + virtual const uint16_t + *getCalleeSavedRegs(const MachineFunction *MF = 0) const { + static const uint16_t CalleeSavedRegs[] = { 0 }; + return CalleeSavedRegs; // save nothing + } + + virtual BitVector getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + return Reserved; // reserve no regs + } + + virtual void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, + RegScavenger *RS = NULL) const; + + virtual unsigned getFrameRegister(const MachineFunction &MF) const { + llvm_unreachable("PTX does not have a frame register"); + } +}; // struct PTXRegisterInfo +} // namespace llvm + +#endif // PTX_REGISTER_INFO_H diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td new file mode 100644 index 0000000..e8b262e --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td @@ -0,0 +1,36 @@ +//===-- PTXRegisterInfo.td - PTX Register defs -------------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the PTX register file +//===----------------------------------------------------------------------===// + +class PTXReg<string n> : Register<n> { + let Namespace = "PTX"; +} + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// + +// The generated register info code throws warnings for empty register classes +// (e.g. zero-length arrays), so we use a dummy register here just to prevent +// these warnings. +def DUMMY_REG : PTXReg<"R0">; + +//===----------------------------------------------------------------------===// +// Register classes +//===----------------------------------------------------------------------===// +def RegPred : RegisterClass<"PTX", [i1], 8, (add DUMMY_REG)>; +def RegI16 : RegisterClass<"PTX", [i16], 16, (add DUMMY_REG)>; +def RegI32 : RegisterClass<"PTX", [i32], 32, (add DUMMY_REG)>; +def RegI64 : RegisterClass<"PTX", [i64], 64, (add DUMMY_REG)>; +def RegF32 : RegisterClass<"PTX", [f32], 32, (add DUMMY_REG)>; +def RegF64 : RegisterClass<"PTX", [f64], 64, (add DUMMY_REG)>; + diff --git a/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp new file mode 100644 index 0000000..a116fab --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp @@ -0,0 +1,150 @@ +//===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PTXSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ptx-selectiondag-info" +#include "PTXTargetMachine.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/SelectionDAG.h" +using namespace llvm; + +PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM) + : TargetSelectionDAGInfo(TM), + Subtarget(&TM.getSubtarget<PTXSubtarget>()) { +} + +PTXSelectionDAGInfo::~PTXSelectionDAGInfo() { +} + +SDValue +PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { + // Do repeated 4-byte loads and stores. To be improved. + // This requires 4-byte alignment. + if ((Align & 3) != 0) + return SDValue(); + // This requires the copy size to be a constant, preferably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (!ConstantSize) + return SDValue(); + uint64_t SizeVal = ConstantSize->getZExtValue(); + // Always inline memcpys. In PTX, we do not have a C library that provides + // a memcpy function. + //if (!AlwaysInline) + // return SDValue(); + + unsigned BytesLeft = SizeVal & 3; + unsigned NumMemOps = SizeVal >> 2; + unsigned EmittedNumMemOps = 0; + EVT VT = MVT::i32; + unsigned VTSize = 4; + unsigned i = 0; + const unsigned MAX_LOADS_IN_LDM = 6; + SDValue TFOps[MAX_LOADS_IN_LDM]; + SDValue Loads[MAX_LOADS_IN_LDM]; + uint64_t SrcOff = 0, DstOff = 0; + EVT PointerType = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; + + // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the + // same number of stores. The loads and stores will get combined into + // ldm/stm later on. + while (EmittedNumMemOps < NumMemOps) { + for (i = 0; + i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, PointerType, Src, + DAG.getConstant(SrcOff, PointerType)), + SrcPtrInfo.getWithOffset(SrcOff), isVolatile, + false, false, 0); + TFOps[i] = Loads[i].getValue(1); + SrcOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + for (i = 0; + i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, PointerType, Dst, + DAG.getConstant(DstOff, PointerType)), + DstPtrInfo.getWithOffset(DstOff), + isVolatile, false, 0); + DstOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + EmittedNumMemOps += i; + } + + if (BytesLeft == 0) + return Chain; + + // Issue loads / stores for the trailing (1 - 3) bytes. + unsigned BytesLeftSave = BytesLeft; + i = 0; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, PointerType, Src, + DAG.getConstant(SrcOff, PointerType)), + SrcPtrInfo.getWithOffset(SrcOff), false, false, + false, 0); + TFOps[i] = Loads[i].getValue(1); + ++i; + SrcOff += VTSize; + BytesLeft -= VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + i = 0; + BytesLeft = BytesLeftSave; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, PointerType, Dst, + DAG.getConstant(DstOff, PointerType)), + DstPtrInfo.getWithOffset(DstOff), false, false, 0); + ++i; + DstOff += VTSize; + BytesLeft -= VTSize; + } + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); +} + +SDValue PTXSelectionDAGInfo:: +EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, bool isVolatile, + MachinePointerInfo DstPtrInfo) const { + llvm_unreachable("memset lowering not implemented for PTX yet"); +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h new file mode 100644 index 0000000..e0c7167 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h @@ -0,0 +1,53 @@ +//===-- PTXSelectionDAGInfo.h - PTX SelectionDAG Info -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PTX subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef PTXSELECTIONDAGINFO_H +#define PTXSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +/// PTXSelectionDAGInfo - TargetSelectionDAGInfo sub-class for the PTX target. +/// At the moment, this is mostly just a copy of ARMSelectionDAGInfo. +class PTXSelectionDAGInfo : public TargetSelectionDAGInfo { + /// Subtarget - Keep a pointer to the PTXSubtarget around so that we can + /// make the right decision when generating code for different targets. + const PTXSubtarget *Subtarget; + +public: + explicit PTXSelectionDAGInfo(const TargetMachine &TM); + ~PTXSelectionDAGInfo(); + + virtual + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const; + + virtual + SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Op1, SDValue Op2, + SDValue Op3, unsigned Align, + bool isVolatile, + MachinePointerInfo DstPtrInfo) const; +}; + +} + +#endif + diff --git a/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp b/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp new file mode 100644 index 0000000..454f64e --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp @@ -0,0 +1,68 @@ +//===-- PTXSubtarget.cpp - PTX Subtarget Information ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PTX specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#include "PTXSubtarget.h" +#include "PTX.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "PTXGenSubtargetInfo.inc" + +using namespace llvm; + +void PTXSubtarget::anchor() { } + +PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool is64Bit) + : PTXGenSubtargetInfo(TT, CPU, FS), + PTXTarget(PTX_COMPUTE_1_0), + PTXVersion(PTX_VERSION_2_0), + SupportsDouble(false), + SupportsFMA(true), + Is64Bit(is64Bit) { + std::string TARGET = CPU; + if (TARGET.empty()) + TARGET = "generic"; + ParseSubtargetFeatures(TARGET, FS); +} + +std::string PTXSubtarget::getTargetString() const { + switch(PTXTarget) { + default: llvm_unreachable("Unknown PTX target"); + case PTX_SM_1_0: return "sm_10"; + case PTX_SM_1_1: return "sm_11"; + case PTX_SM_1_2: return "sm_12"; + case PTX_SM_1_3: return "sm_13"; + case PTX_SM_2_0: return "sm_20"; + case PTX_SM_2_1: return "sm_21"; + case PTX_SM_2_2: return "sm_22"; + case PTX_SM_2_3: return "sm_23"; + case PTX_COMPUTE_1_0: return "compute_10"; + case PTX_COMPUTE_1_1: return "compute_11"; + case PTX_COMPUTE_1_2: return "compute_12"; + case PTX_COMPUTE_1_3: return "compute_13"; + case PTX_COMPUTE_2_0: return "compute_20"; + } +} + +std::string PTXSubtarget::getPTXVersionString() const { + switch(PTXVersion) { + case PTX_VERSION_2_0: return "2.0"; + case PTX_VERSION_2_1: return "2.1"; + case PTX_VERSION_2_2: return "2.2"; + case PTX_VERSION_2_3: return "2.3"; + } + llvm_unreachable("Invalid PTX version"); +} diff --git a/contrib/llvm/lib/Target/PTX/PTXSubtarget.h b/contrib/llvm/lib/Target/PTX/PTXSubtarget.h new file mode 100644 index 0000000..ce93fef --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXSubtarget.h @@ -0,0 +1,131 @@ +//===-- PTXSubtarget.h - Define Subtarget for the PTX -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the PTX specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_SUBTARGET_H +#define PTX_SUBTARGET_H + +#include "llvm/Target/TargetSubtargetInfo.h" + +#define GET_SUBTARGETINFO_HEADER +#include "PTXGenSubtargetInfo.inc" + +namespace llvm { +class StringRef; + + class PTXSubtarget : public PTXGenSubtargetInfo { + virtual void anchor(); + public: + + /** + * Enumeration of Shader Models supported by the back-end. + */ + enum PTXTargetEnum { + PTX_COMPUTE_1_0, /*< Compute Compatibility 1.0 */ + PTX_COMPUTE_1_1, /*< Compute Compatibility 1.1 */ + PTX_COMPUTE_1_2, /*< Compute Compatibility 1.2 */ + PTX_COMPUTE_1_3, /*< Compute Compatibility 1.3 */ + PTX_COMPUTE_2_0, /*< Compute Compatibility 2.0 */ + PTX_LAST_COMPUTE, + + PTX_SM_1_0, /*< Shader Model 1.0 */ + PTX_SM_1_1, /*< Shader Model 1.1 */ + PTX_SM_1_2, /*< Shader Model 1.2 */ + PTX_SM_1_3, /*< Shader Model 1.3 */ + PTX_SM_2_0, /*< Shader Model 2.0 */ + PTX_SM_2_1, /*< Shader Model 2.1 */ + PTX_SM_2_2, /*< Shader Model 2.2 */ + PTX_SM_2_3, /*< Shader Model 2.3 */ + PTX_LAST_SM + }; + + /** + * Enumeration of PTX versions supported by the back-end. + * + * Currently, PTX 2.0 is the minimum supported version. + */ + enum PTXVersionEnum { + PTX_VERSION_2_0, /*< PTX Version 2.0 */ + PTX_VERSION_2_1, /*< PTX Version 2.1 */ + PTX_VERSION_2_2, /*< PTX Version 2.2 */ + PTX_VERSION_2_3 /*< PTX Version 2.3 */ + }; + + private: + + /// Shader Model supported on the target GPU. + PTXTargetEnum PTXTarget; + + /// PTX Language Version. + PTXVersionEnum PTXVersion; + + // The native .f64 type is supported on the hardware. + bool SupportsDouble; + + // Support the fused-multiply add (FMA) and multiply-add (MAD) + // instructions + bool SupportsFMA; + + // Use .u64 instead of .u32 for addresses. + bool Is64Bit; + + public: + + PTXSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool is64Bit); + + // Target architecture accessors + std::string getTargetString() const; + + std::string getPTXVersionString() const; + + bool supportsDouble() const { return SupportsDouble; } + + bool is64Bit() const { return Is64Bit; } + + bool supportsFMA() const { return SupportsFMA; } + + bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; } + + bool supportsPTX22() const { return PTXVersion >= PTX_VERSION_2_2; } + + bool supportsPTX23() const { return PTXVersion >= PTX_VERSION_2_3; } + + bool fdivNeedsRoundingMode() const { + return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) || + (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE); + } + + bool fmadNeedsRoundingMode() const { + return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) || + (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE); + } + + bool useParamSpaceForDeviceArgs() const { + return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) || + (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE); + } + + bool callsAreHandled() const { + return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) || + (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE); + } + + bool emitPtrAttribute() const { + return PTXVersion >= PTX_VERSION_2_2; + } + + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + }; // class PTXSubtarget +} // namespace llvm + +#endif // PTX_SUBTARGET_H diff --git a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp new file mode 100644 index 0000000..97b8de1 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp @@ -0,0 +1,165 @@ +//===-- PTXTargetMachine.cpp - Define TargetMachine for PTX ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Top-level implementation for the PTX target. +// +//===----------------------------------------------------------------------===// + +#include "PTXTargetMachine.h" +#include "PTX.h" +#include "llvm/PassManager.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Assembly/PrintModulePass.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Scalar.h" + + +using namespace llvm; + +namespace llvm { + MCStreamer *createPTXAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, + bool isVerboseAsm, bool useLoc, + bool useCFI, bool useDwarfDirectory, + MCInstPrinter *InstPrint, + MCCodeEmitter *CE, + MCAsmBackend *MAB, + bool ShowInst); +} + +extern "C" void LLVMInitializePTXTarget() { + + RegisterTargetMachine<PTX32TargetMachine> X(ThePTX32Target); + RegisterTargetMachine<PTX64TargetMachine> Y(ThePTX64Target); + + TargetRegistry::RegisterAsmStreamer(ThePTX32Target, createPTXAsmStreamer); + TargetRegistry::RegisterAsmStreamer(ThePTX64Target, createPTXAsmStreamer); +} + +namespace { + const char* DataLayout32 = + "e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; + const char* DataLayout64 = + "e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; +} + +// DataLayout and FrameLowering are filled with dummy data +PTXTargetMachine::PTXTargetMachine(const Target &T, + StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, + bool is64Bit) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + DataLayout(is64Bit ? DataLayout64 : DataLayout32), + Subtarget(TT, CPU, FS, is64Bit), + FrameLowering(Subtarget), + InstrInfo(*this), + TSInfo(*this), + TLInfo(*this) { +} + +void PTX32TargetMachine::anchor() { } + +PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) { +} + +void PTX64TargetMachine::anchor() { } + +PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) { +} + +namespace llvm { +/// PTX Code Generator Pass Configuration Options. +class PTXPassConfig : public TargetPassConfig { +public: + PTXPassConfig(PTXTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + PTXTargetMachine &getPTXTargetMachine() const { + return getTM<PTXTargetMachine>(); + } + + bool addInstSelector(); + FunctionPass *createTargetRegisterAllocator(bool); + void addOptimizedRegAlloc(FunctionPass *RegAllocPass); + bool addPostRegAlloc(); + void addMachineLateOptimization(); + bool addPreEmitPass(); +}; +} // namespace + +TargetPassConfig *PTXTargetMachine::createPassConfig(PassManagerBase &PM) { + PTXPassConfig *PassConfig = new PTXPassConfig(this, PM); + PassConfig->disablePass(PrologEpilogCodeInserterID); + return PassConfig; +} + +bool PTXPassConfig::addInstSelector() { + PM->add(createPTXISelDag(getPTXTargetMachine(), getOptLevel())); + return false; +} + +FunctionPass *PTXPassConfig::createTargetRegisterAllocator(bool /*Optimized*/) { + return createPTXRegisterAllocator(); +} + +// Modify the optimized compilation path to bypass optimized register alloction. +void PTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { + addFastRegAlloc(RegAllocPass); +} + +bool PTXPassConfig::addPostRegAlloc() { + // PTXMFInfoExtract must after register allocation! + //PM->add(createPTXMFInfoExtract(getPTXTargetMachine())); + return false; +} + +/// Add passes that optimize machine instructions after register allocation. +void PTXPassConfig::addMachineLateOptimization() { + if (addPass(BranchFolderPassID) != &NoPassID) + printAndVerify("After BranchFolding"); + + if (addPass(TailDuplicateID) != &NoPassID) + printAndVerify("After TailDuplicate"); +} + +bool PTXPassConfig::addPreEmitPass() { + PM->add(createPTXMFInfoExtract(getPTXTargetMachine(), getOptLevel())); + PM->add(createPTXFPRoundingModePass(getPTXTargetMachine(), getOptLevel())); + return true; +} diff --git a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h new file mode 100644 index 0000000..278d155 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h @@ -0,0 +1,104 @@ +//===-- PTXTargetMachine.h - Define TargetMachine for PTX -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the PTX specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_TARGET_MACHINE_H +#define PTX_TARGET_MACHINE_H + +#include "PTXISelLowering.h" +#include "PTXInstrInfo.h" +#include "PTXFrameLowering.h" +#include "PTXSelectionDAGInfo.h" +#include "PTXSubtarget.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { +class PTXTargetMachine : public LLVMTargetMachine { + private: + const TargetData DataLayout; + PTXSubtarget Subtarget; // has to be initialized before FrameLowering + PTXFrameLowering FrameLowering; + PTXInstrInfo InstrInfo; + PTXSelectionDAGInfo TSInfo; + PTXTargetLowering TLInfo; + + public: + PTXTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, + bool is64Bit); + + virtual const TargetData *getTargetData() const { return &DataLayout; } + + virtual const TargetFrameLowering *getFrameLowering() const { + return &FrameLowering; + } + + virtual const PTXInstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const TargetRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); } + + virtual const PTXTargetLowering *getTargetLowering() const { + return &TLInfo; } + + virtual const PTXSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + + virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; } + + // Emission of machine code through JITCodeEmitter is not supported. + virtual bool addPassesToEmitMachineCode(PassManagerBase &, + JITCodeEmitter &, + bool = true) { + return true; + } + + // Emission of machine code through MCJIT is not supported. + virtual bool addPassesToEmitMC(PassManagerBase &, + MCContext *&, + raw_ostream &, + bool = true) { + return true; + } + + // Pass Pipeline Configuration + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); +}; // class PTXTargetMachine + + +class PTX32TargetMachine : public PTXTargetMachine { + virtual void anchor(); +public: + + PTX32TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); +}; // class PTX32TargetMachine + +class PTX64TargetMachine : public PTXTargetMachine { + virtual void anchor(); +public: + + PTX64TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); +}; // class PTX32TargetMachine + +} // namespace llvm + +#endif // PTX_TARGET_MACHINE_H diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp b/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp new file mode 100644 index 0000000..09a2735 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp @@ -0,0 +1,25 @@ +//===-- PTXTargetInfo.cpp - PTX Target Implementation ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PTX.h" +#include "llvm/Module.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +Target llvm::ThePTX32Target; +Target llvm::ThePTX64Target; + +extern "C" void LLVMInitializePTXTargetInfo() { + // see llvm/ADT/Triple.h + RegisterTarget<Triple::ptx32> X32(ThePTX32Target, "ptx32", + "PTX (32-bit) [Experimental]"); + RegisterTarget<Triple::ptx64> X64(ThePTX64Target, "ptx64", + "PTX (64-bit) [Experimental]"); +} |