summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/NVPTX
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2015-05-27 20:26:41 +0000
committerdim <dim@FreeBSD.org>2015-05-27 20:26:41 +0000
commit5ef8fd3549d38e883a31881636be3dc2a275de20 (patch)
treebd13a22d9db57ccf3eddbc07b32c18109521d050 /contrib/llvm/lib/Target/NVPTX
parent77794ebe2d5718eb502c93ec32f8ccae4d8a0b7b (diff)
parent782067d0278612ee75d024b9b135c221c327e9e8 (diff)
downloadFreeBSD-src-5ef8fd3549d38e883a31881636be3dc2a275de20.zip
FreeBSD-src-5ef8fd3549d38e883a31881636be3dc2a275de20.tar.gz
Merge llvm trunk r238337 from ^/vendor/llvm/dist, resolve conflicts, and
preserve our customizations, where necessary.
Diffstat (limited to 'contrib/llvm/lib/Target/NVPTX')
-rw-r--r--contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp10
-rw-r--r--contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h5
-rw-r--r--contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp59
-rw-r--r--contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h2
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTX.h1
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTX.td22
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp40
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h28
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp528
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h61
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp2
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp12
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp12
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h12
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp4
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp265
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h11
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp285
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h18
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h2
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td70
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp49
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h29
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp4
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp10
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h50
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp8
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp5
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h11
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp6
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXSection.h5
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp40
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h20
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp64
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h16
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h19
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp140
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h76
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp9
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXVector.td8
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXutil.cpp90
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXutil.h25
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp38
45 files changed, 1232 insertions, 945 deletions
diff --git a/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
index 80b2f62..ac92df9 100644
--- a/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
@@ -28,13 +28,9 @@ using namespace llvm;
#include "NVPTXGenAsmWriter.inc"
-
NVPTXInstPrinter::NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI)
- : MCInstPrinter(MAI, MII, MRI) {
- setAvailableFeatures(STI.getFeatureBits());
-}
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
// Decode the virtual register
@@ -72,7 +68,7 @@ void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
}
void NVPTXInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
- StringRef Annot) {
+ StringRef Annot, const MCSubtargetInfo &STI) {
printInstruction(MI, OS);
// Next always print the annotation.
diff --git a/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
index 0496964..02c5a21 100644
--- a/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
+++ b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
@@ -25,10 +25,11 @@ class MCSubtargetInfo;
class NVPTXInstPrinter : public MCInstPrinter {
public:
NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+ const MCRegisterInfo &MRI);
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
- void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override;
+ void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index 11d737e..b9df3d1 100644
--- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -39,6 +39,8 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(StringRef TT) {
InlineAsmEnd = " inline asm";
SupportsDebugInformation = CompileForDebugging;
+ // PTX does not allow .align on functions.
+ HasFunctionAlignment = false;
HasDotTypeDotSizeDirective = false;
Data8bitsDirective = " .b8 ";
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index 158ca90..d500105 100644
--- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -54,52 +54,39 @@ createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) {
static MCCodeGenInfo *createNVPTXMCCodeGenInfo(
StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
- X->InitMCCodeGenInfo(RM, CM, OL);
+ X->initMCCodeGenInfo(RM, CM, OL);
return X;
}
-static MCInstPrinter *createNVPTXMCInstPrinter(const Target &T,
+static MCInstPrinter *createNVPTXMCInstPrinter(const Triple &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
+ const MCRegisterInfo &MRI) {
if (SyntaxVariant == 0)
- return new NVPTXInstPrinter(MAI, MII, MRI, STI);
+ return new NVPTXInstPrinter(MAI, MII, MRI);
return nullptr;
}
// Force static initialization.
extern "C" void LLVMInitializeNVPTXTargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfo<NVPTXMCAsmInfo> X(TheNVPTXTarget32);
- RegisterMCAsmInfo<NVPTXMCAsmInfo> Y(TheNVPTXTarget64);
-
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget32,
- createNVPTXMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget64,
- createNVPTXMCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget32, createNVPTXMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget64, createNVPTXMCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget32,
- createNVPTXMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget64,
- createNVPTXMCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget32,
- createNVPTXMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget64,
- createNVPTXMCSubtargetInfo);
-
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(TheNVPTXTarget32,
- createNVPTXMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheNVPTXTarget64,
- createNVPTXMCInstPrinter);
+ for (Target *T : {&TheNVPTXTarget32, &TheNVPTXTarget64}) {
+ // Register the MC asm info.
+ RegisterMCAsmInfo<NVPTXMCAsmInfo> X(*T);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(*T, createNVPTXMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createNVPTXMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createNVPTXMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createNVPTXMCSubtargetInfo);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(*T, createNVPTXMCInstPrinter);
+ }
}
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
index 98821d2..bfd5123 100644
--- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
@@ -14,6 +14,8 @@
#ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H
#define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H
+#include <stdint.h>
+
namespace llvm {
class Target;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.h b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
index a3382eb..382525d 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
@@ -59,7 +59,6 @@ inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
llvm_unreachable("Unknown condition code");
}
-ImmutablePass *createNVPTXTargetTransformInfoPass(const NVPTXTargetMachine *TM);
FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
llvm::CodeGenOpt::Level OptLevel);
ModulePass *createNVPTXAssignValidGlobalNamesPass();
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.td b/contrib/llvm/lib/Target/NVPTX/NVPTX.td
index 93fabf6..96abfa8 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTX.td
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.td
@@ -32,20 +32,28 @@ def SM21 : SubtargetFeature<"sm_21", "SmVersion", "21",
"Target SM 2.1">;
def SM30 : SubtargetFeature<"sm_30", "SmVersion", "30",
"Target SM 3.0">;
+def SM32 : SubtargetFeature<"sm_32", "SmVersion", "32",
+ "Target SM 3.2">;
def SM35 : SubtargetFeature<"sm_35", "SmVersion", "35",
"Target SM 3.5">;
+def SM37 : SubtargetFeature<"sm_37", "SmVersion", "37",
+ "Target SM 3.7">;
def SM50 : SubtargetFeature<"sm_50", "SmVersion", "50",
"Target SM 5.0">;
+def SM52 : SubtargetFeature<"sm_52", "SmVersion", "52",
+ "Target SM 5.2">;
+def SM53 : SubtargetFeature<"sm_53", "SmVersion", "53",
+ "Target SM 5.3">;
// PTX Versions
-def PTX30 : SubtargetFeature<"ptx30", "PTXVersion", "30",
- "Use PTX version 3.0">;
-def PTX31 : SubtargetFeature<"ptx31", "PTXVersion", "31",
- "Use PTX version 3.1">;
def PTX32 : SubtargetFeature<"ptx32", "PTXVersion", "32",
"Use PTX version 3.2">;
def PTX40 : SubtargetFeature<"ptx40", "PTXVersion", "40",
"Use PTX version 4.0">;
+def PTX41 : SubtargetFeature<"ptx41", "PTXVersion", "41",
+ "Use PTX version 4.1">;
+def PTX42 : SubtargetFeature<"ptx42", "PTXVersion", "42",
+ "Use PTX version 4.2">;
//===----------------------------------------------------------------------===//
// NVPTX supported processors.
@@ -57,8 +65,12 @@ class Proc<string Name, list<SubtargetFeature> Features>
def : Proc<"sm_20", [SM20]>;
def : Proc<"sm_21", [SM21]>;
def : Proc<"sm_30", [SM30]>;
+def : Proc<"sm_32", [SM32, PTX40]>;
def : Proc<"sm_35", [SM35]>;
-def : Proc<"sm_50", [SM50]>;
+def : Proc<"sm_37", [SM37, PTX41]>;
+def : Proc<"sm_50", [SM50, PTX40]>;
+def : Proc<"sm_52", [SM52, PTX41]>;
+def : Proc<"sm_53", [SM53, PTX42]>;
def NVPTXInstrInfo : InstrInfo {
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
index 1f37696..4f3ccf4 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
@@ -12,11 +12,33 @@
//===----------------------------------------------------------------------===//
#include "NVPTXAllocaHoisting.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/StackProtector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
+using namespace llvm;
-namespace llvm {
+namespace {
+// Hoisting the alloca instructions in the non-entry blocks to the entry
+// block.
+class NVPTXAllocaHoisting : public FunctionPass {
+public:
+ static char ID; // Pass ID
+ NVPTXAllocaHoisting() : FunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addPreserved<StackProtector>();
+ }
+
+ const char *getPassName() const override {
+ return "NVPTX specific alloca hoisting";
+ }
+
+ bool runOnFunction(Function &function) override;
+};
+} // namespace
bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
bool functionModified = false;
@@ -36,11 +58,15 @@ bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
return functionModified;
}
-char NVPTXAllocaHoisting::ID = 1;
-static RegisterPass<NVPTXAllocaHoisting>
-X("alloca-hoisting", "Hoisting alloca instructions in non-entry "
- "blocks to the entry block");
+char NVPTXAllocaHoisting::ID = 0;
+
+namespace llvm {
+void initializeNVPTXAllocaHoistingPass(PassRegistry &);
+}
-FunctionPass *createAllocaHoisting() { return new NVPTXAllocaHoisting(); }
+INITIALIZE_PASS(
+ NVPTXAllocaHoisting, "alloca-hoisting",
+ "Hoisting alloca instructions in non-entry blocks to the entry block",
+ false, false)
-} // end namespace llvm
+FunctionPass *llvm::createAllocaHoisting() { return new NVPTXAllocaHoisting; }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h
index c343980..7a6fc7d 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h
@@ -14,38 +14,10 @@
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H
#define LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/StackProtector.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/Pass.h"
-
namespace llvm {
-
class FunctionPass;
-class Function;
-
-// Hoisting the alloca instructions in the non-entry blocks to the entry
-// block.
-class NVPTXAllocaHoisting : public FunctionPass {
-public:
- static char ID; // Pass ID
- NVPTXAllocaHoisting() : FunctionPass(ID) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DataLayoutPass>();
- AU.addPreserved<MachineFunctionAnalysis>();
- AU.addPreserved<StackProtector>();
- }
-
- const char *getPassName() const override {
- return "NVPTX specific alloca hoisting";
- }
-
- bool runOnFunction(Function &function) override;
-};
extern FunctionPass *createAllocaHoisting();
-
} // end namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index beec9b2..3bbea40 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -27,6 +27,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/DebugInfo.h"
@@ -36,6 +37,7 @@
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
@@ -45,6 +47,7 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TimeValue.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
#include <sstream>
using namespace llvm;
@@ -116,7 +119,7 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
DebugLoc curLoc = MI.getDebugLoc();
- if (prevDebugLoc.isUnknown() && curLoc.isUnknown())
+ if (!prevDebugLoc && !curLoc)
return;
if (prevDebugLoc == curLoc)
@@ -124,50 +127,43 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
prevDebugLoc = curLoc;
- if (curLoc.isUnknown())
+ if (!curLoc)
return;
- const MachineFunction *MF = MI.getParent()->getParent();
- //const TargetMachine &TM = MF->getTarget();
-
- const LLVMContext &ctx = MF->getFunction()->getContext();
- DIScope Scope(curLoc.getScope(ctx));
-
- assert((!Scope || Scope.isScope()) &&
- "Scope of a DebugLoc should be null or a DIScope.");
+ auto *Scope = cast_or_null<DIScope>(curLoc.getScope());
if (!Scope)
return;
- StringRef fileName(Scope.getFilename());
- StringRef dirName(Scope.getDirectory());
+ StringRef fileName(Scope->getFilename());
+ StringRef dirName(Scope->getDirectory());
SmallString<128> FullPathName = dirName;
if (!dirName.empty() && !sys::path::is_absolute(fileName)) {
sys::path::append(FullPathName, fileName);
- fileName = FullPathName.str();
+ fileName = FullPathName;
}
- if (filenameMap.find(fileName.str()) == filenameMap.end())
+ if (filenameMap.find(fileName) == filenameMap.end())
return;
// Emit the line from the source file.
if (InterleaveSrc)
- this->emitSrcInText(fileName.str(), curLoc.getLine());
+ this->emitSrcInText(fileName, curLoc.getLine());
std::stringstream temp;
- temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine()
+ temp << "\t.loc " << filenameMap[fileName] << " " << curLoc.getLine()
<< " " << curLoc.getCol();
- OutStreamer.EmitRawText(Twine(temp.str().c_str()));
+ OutStreamer->EmitRawText(temp.str());
}
void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallString<128> Str;
raw_svector_ostream OS(Str);
- if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
+ if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA)
emitLineNumberAsDotLoc(*MI);
MCInst Inst;
lowerToMCInst(MI, Inst);
- EmitToStreamer(OutStreamer, Inst);
+ EmitToStreamer(*OutStreamer, Inst);
}
// Handle symbol backtracking for targets that do not support image handles
@@ -229,19 +225,17 @@ void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
const char *Sym = MFI->getImageHandleSymbol(Index);
std::string *SymNamePtr =
nvTM.getManagedStrPool()->getManagedString(Sym);
- MCOp = GetSymbolRef(OutContext.GetOrCreateSymbol(
+ MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(
StringRef(SymNamePtr->c_str())));
}
void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
OutMI.setOpcode(MI->getOpcode());
- const NVPTXSubtarget &ST = TM.getSubtarget<NVPTXSubtarget>();
-
// Special: Do not mangle symbol operand of CALL_PROTOTYPE
if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
const MachineOperand &MO = MI->getOperand(0);
OutMI.addOperand(GetSymbolRef(
- OutContext.GetOrCreateSymbol(Twine(MO.getSymbolName()))));
+ OutContext.getOrCreateSymbol(Twine(MO.getSymbolName()))));
return;
}
@@ -249,7 +243,7 @@ void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
const MachineOperand &MO = MI->getOperand(i);
MCOperand MCOp;
- if (!ST.hasImageHandles()) {
+ if (!nvptxSubtarget->hasImageHandles()) {
if (lowerImageHandleOperand(MI, i, MCOp)) {
OutMI.addOperand(MCOp);
continue;
@@ -266,13 +260,13 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
switch (MO.getType()) {
default: llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
- MCOp = MCOperand::CreateReg(encodeVirtualRegister(MO.getReg()));
+ MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
break;
case MachineOperand::MO_Immediate:
- MCOp = MCOperand::CreateImm(MO.getImm());
+ MCOp = MCOperand::createImm(MO.getImm());
break;
case MachineOperand::MO_MachineBasicBlock:
- MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create(
MO.getMBB()->getSymbol(), OutContext));
break;
case MachineOperand::MO_ExternalSymbol:
@@ -288,11 +282,11 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
switch (Cnt->getType()->getTypeID()) {
default: report_fatal_error("Unsupported FP type"); break;
case Type::FloatTyID:
- MCOp = MCOperand::CreateExpr(
+ MCOp = MCOperand::createExpr(
NVPTXFloatMCExpr::CreateConstantFPSingle(Val, OutContext));
break;
case Type::DoubleTyID:
- MCOp = MCOperand::CreateExpr(
+ MCOp = MCOperand::createExpr(
NVPTXFloatMCExpr::CreateConstantFPDouble(Val, OutContext));
break;
}
@@ -342,16 +336,16 @@ MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
const MCExpr *Expr;
Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
OutContext);
- return MCOperand::CreateExpr(Expr);
+ return MCOperand::createExpr(Expr);
}
void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
- const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
- const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
+ const DataLayout *TD = TM.getDataLayout();
+ const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
Type *Ty = F->getReturnType();
- bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+ bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
if (Ty->getTypeID() == Type::VoidTyID)
return;
@@ -418,6 +412,42 @@ void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
printReturnValStr(F, O);
}
+// Return true if MBB is the header of a loop marked with
+// llvm.loop.unroll.disable.
+// TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll".
+bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
+ const MachineBasicBlock &MBB) const {
+ MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
+ // TODO: isLoopHeader() should take "const MachineBasicBlock *".
+ // We insert .pragma "nounroll" only to the loop header.
+ if (!LI.isLoopHeader(const_cast<MachineBasicBlock *>(&MBB)))
+ return false;
+
+ // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
+ // we iterate through each back edge of the loop with header MBB, and check
+ // whether its metadata contains llvm.loop.unroll.disable.
+ for (auto I = MBB.pred_begin(); I != MBB.pred_end(); ++I) {
+ const MachineBasicBlock *PMBB = *I;
+ if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
+ // Edges from other loops to MBB are not back edges.
+ continue;
+ }
+ if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
+ if (MDNode *LoopID = PBB->getTerminator()->getMetadata("llvm.loop")) {
+ if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
+ AsmPrinter::EmitBasicBlockStart(MBB);
+ if (isLoopHeaderOfNoUnroll(MBB))
+ OutStreamer->EmitRawText(StringRef("\t.pragma \"nounroll\";\n"));
+}
+
void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
SmallString<128> Str;
raw_svector_ostream O(Str);
@@ -445,39 +475,37 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
if (llvm::isKernelFunction(*F))
emitKernelFunctionDirectives(*F, O);
- OutStreamer.EmitRawText(O.str());
+ OutStreamer->EmitRawText(O.str());
prevDebugLoc = DebugLoc();
}
void NVPTXAsmPrinter::EmitFunctionBodyStart() {
VRegMapping.clear();
- OutStreamer.EmitRawText(StringRef("{\n"));
+ OutStreamer->EmitRawText(StringRef("{\n"));
setAndEmitFunctionVirtualRegisters(*MF);
SmallString<128> Str;
raw_svector_ostream O(Str);
emitDemotedVars(MF->getFunction(), O);
- OutStreamer.EmitRawText(O.str());
+ OutStreamer->EmitRawText(O.str());
}
void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
- OutStreamer.EmitRawText(StringRef("}\n"));
+ OutStreamer->EmitRawText(StringRef("}\n"));
VRegMapping.clear();
}
void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
unsigned RegNo = MI->getOperand(0).getReg();
- const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
- if (TRI->isVirtualRegister(RegNo)) {
- OutStreamer.AddComment(Twine("implicit-def: ") +
- getVirtualRegisterName(RegNo));
+ if (TargetRegisterInfo::isVirtualRegister(RegNo)) {
+ OutStreamer->AddComment(Twine("implicit-def: ") +
+ getVirtualRegisterName(RegNo));
} else {
- OutStreamer.AddComment(
- Twine("implicit-def: ") +
- TM.getSubtargetImpl()->getRegisterInfo()->getName(RegNo));
+ OutStreamer->AddComment(Twine("implicit-def: ") +
+ nvptxSubtarget->getRegisterInfo()->getName(RegNo));
}
- OutStreamer.AddBlankLine();
+ OutStreamer->AddBlankLine();
}
void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
@@ -487,15 +515,15 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
// If none of reqntid* is specified, don't output reqntid directive.
unsigned reqntidx, reqntidy, reqntidz;
bool specified = false;
- if (llvm::getReqNTIDx(F, reqntidx) == false)
+ if (!llvm::getReqNTIDx(F, reqntidx))
reqntidx = 1;
else
specified = true;
- if (llvm::getReqNTIDy(F, reqntidy) == false)
+ if (!llvm::getReqNTIDy(F, reqntidy))
reqntidy = 1;
else
specified = true;
- if (llvm::getReqNTIDz(F, reqntidz) == false)
+ if (!llvm::getReqNTIDz(F, reqntidz))
reqntidz = 1;
else
specified = true;
@@ -509,15 +537,15 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
// If none of maxntid* is specified, don't output maxntid directive.
unsigned maxntidx, maxntidy, maxntidz;
specified = false;
- if (llvm::getMaxNTIDx(F, maxntidx) == false)
+ if (!llvm::getMaxNTIDx(F, maxntidx))
maxntidx = 1;
else
specified = true;
- if (llvm::getMaxNTIDy(F, maxntidy) == false)
+ if (!llvm::getMaxNTIDy(F, maxntidy))
maxntidy = 1;
else
specified = true;
- if (llvm::getMaxNTIDz(F, maxntidz) == false)
+ if (!llvm::getMaxNTIDz(F, maxntidz))
maxntidz = 1;
else
specified = true;
@@ -607,7 +635,7 @@ static bool usedInGlobalVarDef(const Constant *C) {
return false;
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
- if (GV->getName().str() == "llvm.used")
+ if (GV->getName() == "llvm.used")
return false;
return true;
}
@@ -622,7 +650,7 @@ static bool usedInGlobalVarDef(const Constant *C) {
static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
- if (othergv->getName().str() == "llvm.used")
+ if (othergv->getName() == "llvm.used")
return true;
}
@@ -638,7 +666,7 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
}
for (const User *UU : U->users())
- if (usedInOneFunc(UU, oneFunc) == false)
+ if (!usedInOneFunc(UU, oneFunc))
return false;
return true;
@@ -652,7 +680,7 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
* 3. Is the global variable referenced only in one function?
*/
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
- if (gv->hasInternalLinkage() == false)
+ if (!gv->hasInternalLinkage())
return false;
const PointerType *Pty = gv->getType();
if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
@@ -661,7 +689,7 @@ static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
const Function *oneFunc = nullptr;
bool flag = usedInOneFunc(gv, oneFunc);
- if (flag == false)
+ if (!flag)
return false;
if (!oneFunc)
return false;
@@ -746,37 +774,45 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
DbgFinder.processModule(M);
unsigned i = 1;
- for (DICompileUnit DIUnit : DbgFinder.compile_units()) {
- StringRef Filename(DIUnit.getFilename());
- StringRef Dirname(DIUnit.getDirectory());
+ for (const DICompileUnit *DIUnit : DbgFinder.compile_units()) {
+ StringRef Filename = DIUnit->getFilename();
+ StringRef Dirname = DIUnit->getDirectory();
SmallString<128> FullPathName = Dirname;
if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
sys::path::append(FullPathName, Filename);
- Filename = FullPathName.str();
+ Filename = FullPathName;
}
- if (filenameMap.find(Filename.str()) != filenameMap.end())
+ if (filenameMap.find(Filename) != filenameMap.end())
continue;
- filenameMap[Filename.str()] = i;
- OutStreamer.EmitDwarfFileDirective(i, "", Filename.str());
+ filenameMap[Filename] = i;
+ OutStreamer->EmitDwarfFileDirective(i, "", Filename);
++i;
}
- for (DISubprogram SP : DbgFinder.subprograms()) {
- StringRef Filename(SP.getFilename());
- StringRef Dirname(SP.getDirectory());
+ for (DISubprogram *SP : DbgFinder.subprograms()) {
+ StringRef Filename = SP->getFilename();
+ StringRef Dirname = SP->getDirectory();
SmallString<128> FullPathName = Dirname;
if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
sys::path::append(FullPathName, Filename);
- Filename = FullPathName.str();
+ Filename = FullPathName;
}
- if (filenameMap.find(Filename.str()) != filenameMap.end())
+ if (filenameMap.find(Filename) != filenameMap.end())
continue;
- filenameMap[Filename.str()] = i;
+ filenameMap[Filename] = i;
++i;
}
}
bool NVPTXAsmPrinter::doInitialization(Module &M) {
+ // Construct a default subtarget off of the TargetMachine defaults. The
+ // rest of NVPTX isn't friendly to change subtargets per function and
+ // so the default TargetMachine will have all of the options.
+ StringRef TT = TM.getTargetTriple();
+ StringRef CPU = TM.getTargetCPU();
+ StringRef FS = TM.getTargetFeatureString();
+ const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
+ const NVPTXSubtarget STI(TT, CPU, FS, NTM);
SmallString<128> Str1;
raw_svector_ostream OS1(Str1);
@@ -791,26 +827,27 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
.Initialize(OutContext, TM);
- Mang = new Mangler(TM.getSubtargetImpl()->getDataLayout());
+ Mang = new Mangler(TM.getDataLayout());
// Emit header before any dwarf directives are emitted below.
- emitHeader(M, OS1);
- OutStreamer.EmitRawText(OS1.str());
+ emitHeader(M, OS1, STI);
+ OutStreamer->EmitRawText(OS1.str());
// Already commented out
//bool Result = AsmPrinter::doInitialization(M);
// Emit module-level inline asm if it exists.
if (!M.getModuleInlineAsm().empty()) {
- OutStreamer.AddComment("Start of file scope inline assembly");
- OutStreamer.AddBlankLine();
- OutStreamer.EmitRawText(StringRef(M.getModuleInlineAsm()));
- OutStreamer.AddBlankLine();
- OutStreamer.AddComment("End of file scope inline assembly");
- OutStreamer.AddBlankLine();
+ OutStreamer->AddComment("Start of file scope inline assembly");
+ OutStreamer->AddBlankLine();
+ OutStreamer->EmitRawText(StringRef(M.getModuleInlineAsm()));
+ OutStreamer->AddBlankLine();
+ OutStreamer->AddComment("End of file scope inline assembly");
+ OutStreamer->AddBlankLine();
}
- if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
+ // If we're not NVCL we're CUDA, go ahead and emit filenames.
+ if (Triple(TM.getTargetTriple()).getOS() != Triple::NVCL)
recordAndEmitFilenames(M);
GlobalsEmitted = false;
@@ -848,25 +885,27 @@ void NVPTXAsmPrinter::emitGlobals(const Module &M) {
OS2 << '\n';
- OutStreamer.EmitRawText(OS2.str());
+ OutStreamer->EmitRawText(OS2.str());
}
-void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
+void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
+ const NVPTXSubtarget &STI) {
O << "//\n";
O << "// Generated by LLVM NVPTX Back-End\n";
O << "//\n";
O << "\n";
- unsigned PTXVersion = nvptxSubtarget.getPTXVersion();
+ unsigned PTXVersion = STI.getPTXVersion();
O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
O << ".target ";
- O << nvptxSubtarget.getTargetName();
+ O << STI.getTargetName();
- if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL)
+ const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
+ if (NTM.getDrvInterface() == NVPTX::NVCL)
O << ", texmode_independent";
- if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
- if (!nvptxSubtarget.hasDouble())
+ else {
+ if (!STI.hasDouble())
O << ", map_f64_to_f32";
}
@@ -876,7 +915,7 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
O << "\n";
O << ".address_size ";
- if (nvptxSubtarget.is64Bit())
+ if (NTM.is64Bit())
O << "64";
else
O << "32";
@@ -886,7 +925,6 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
}
bool NVPTXAsmPrinter::doFinalization(Module &M) {
-
// If we did not emit any functions, then the global declarations have not
// yet been emitted.
if (!GlobalsEmitted) {
@@ -948,7 +986,7 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
raw_ostream &O) {
- if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
+ if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
if (V->hasExternalLinkage()) {
if (isa<GlobalVariable>(V)) {
const GlobalVariable *GVar = cast<GlobalVariable>(V);
@@ -967,7 +1005,7 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
msg.append("Error: ");
msg.append("Symbol ");
if (V->hasName())
- msg.append(V->getName().str());
+ msg.append(V->getName());
msg.append("has unsupported appending linkage type");
llvm_unreachable(msg.c_str());
} else if (!V->hasInternalLinkage() &&
@@ -992,7 +1030,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
GVar->getName().startswith("nvvm."))
return;
- const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout *TD = TM.getDataLayout();
// GlobalVariables are always constant pointers themselves.
const PointerType *PTy = GVar->getType();
@@ -1103,7 +1141,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
const Function *demotedFunc = nullptr;
if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
- O << "// " << GVar->getName().str() << " has been demoted\n";
+ O << "// " << GVar->getName() << " has been demoted\n";
if (localDecls.find(demotedFunc) != localDecls.end())
localDecls[demotedFunc].push_back(GVar);
else {
@@ -1142,7 +1180,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
if ((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
(PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) {
const Constant *Initializer = GVar->getInitializer();
- // 'undef' is treated as there is no value spefied.
+ // 'undef' is treated as there is no value specified.
if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
O << " = ";
printScalarConstant(Initializer, O);
@@ -1151,9 +1189,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
// The frontend adds zero-initializer to variables that don't have an
// initial value, so skip warning for this case.
if (!GVar->getInitializer()->isNullValue()) {
- std::string warnMsg = "initial value of '" + GVar->getName().str() +
- "' is not allowed in addrspace(" +
- llvm::utostr_32(PTy->getAddressSpace()) + ")";
+ std::string warnMsg =
+ ("initial value of '" + GVar->getName() +
+ "' is not allowed in addrspace(" +
+ Twine(llvm::utostr_32(PTy->getAddressSpace())) + ")").str();
report_fatal_error(warnMsg.c_str());
}
}
@@ -1180,7 +1219,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
AggBuffer aggBuffer(ElementSize, O, *this);
bufferAggregateConstant(Initializer, &aggBuffer);
if (aggBuffer.numSymbols) {
- if (nvptxSubtarget.is64Bit()) {
+ if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) {
O << " .u64 " << *getSymbol(GVar) << "[";
O << ElementSize / 8;
} else {
@@ -1278,7 +1317,7 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
case Type::DoubleTyID:
return "f64";
case Type::PointerTyID:
- if (nvptxSubtarget.is64Bit())
+ if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit())
if (useB4PTR)
return "b64";
else
@@ -1295,7 +1334,7 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
raw_ostream &O) {
- const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout *TD = TM.getDataLayout();
// GlobalVariables are always constant pointers themselves.
const PointerType *PTy = GVar->getType();
@@ -1369,50 +1408,22 @@ static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
int paramIndex, raw_ostream &O) {
- if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
- (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA))
- O << *getSymbol(I->getParent()) << "_param_" << paramIndex;
- else {
- std::string argName = I->getName();
- const char *p = argName.c_str();
- while (*p) {
- if (*p == '.')
- O << "_";
- else
- O << *p;
- p++;
- }
- }
+ O << *getSymbol(I->getParent()) << "_param_" << paramIndex;
}
void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
- Function::const_arg_iterator I, E;
- int i = 0;
-
- if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
- (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) {
- O << *CurrentFnSym << "_param_" << paramIndex;
- return;
- }
-
- for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) {
- if (i == paramIndex) {
- printParamName(I, paramIndex, O);
- return;
- }
- }
- llvm_unreachable("paramIndex out of bound");
+ O << *CurrentFnSym << "_param_" << paramIndex;
}
void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
- const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout *TD = TM.getDataLayout();
const AttributeSet &PAL = F->getAttributes();
- const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering();
+ const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
Function::const_arg_iterator I, E;
unsigned paramIndex = 0;
bool first = true;
bool isKernelFunc = llvm::isKernelFunction(*F);
- bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+ bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
MVT thePointerTy = TLI->getPointerTy();
O << "(\n";
@@ -1431,21 +1442,21 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
if (isImage(*I)) {
std::string sname = I->getName();
if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
- if (nvptxSubtarget.hasImageHandles())
+ if (nvptxSubtarget->hasImageHandles())
O << "\t.param .u64 .ptr .surfref ";
else
O << "\t.param .surfref ";
O << *CurrentFnSym << "_param_" << paramIndex;
}
else { // Default image is read_only
- if (nvptxSubtarget.hasImageHandles())
+ if (nvptxSubtarget->hasImageHandles())
O << "\t.param .u64 .ptr .texref ";
else
O << "\t.param .texref ";
O << *CurrentFnSym << "_param_" << paramIndex;
}
} else {
- if (nvptxSubtarget.hasImageHandles())
+ if (nvptxSubtarget->hasImageHandles())
O << "\t.param .u64 .ptr .samplerref ";
else
O << "\t.param .samplerref ";
@@ -1455,7 +1466,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
}
}
- if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) {
+ if (!PAL.hasAttribute(paramIndex + 1, Attribute::ByVal)) {
if (Ty->isAggregateType() || Ty->isVectorTy()) {
// Just print .param .align <a> .b8 .param[size];
// <a> = PAL.getparamalignment
@@ -1478,7 +1489,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
// Special handling for pointer arguments to kernel
O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
- if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) {
+ if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
+ NVPTX::CUDA) {
Type *ETy = PTy->getElementType();
int addrSpace = PTy->getAddressSpace();
switch (addrSpace) {
@@ -1607,7 +1619,7 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
if (NumBytes) {
O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME
<< getFunctionNumber() << "[" << NumBytes << "];\n";
- if (nvptxSubtarget.is64Bit()) {
+ if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
O << "\t.reg .b64 \t%SP;\n";
O << "\t.reg .b64 \t%SPL;\n";
} else {
@@ -1655,7 +1667,7 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
}
}
- OutStreamer.EmitRawText(O.str());
+ OutStreamer->EmitRawText(O.str());
}
void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
@@ -1738,7 +1750,7 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
AggBuffer *aggBuffer) {
- const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout *TD = TM.getDataLayout();
if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
int s = TD->getTypeAllocSize(CPV->getType());
@@ -1754,12 +1766,11 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
case Type::IntegerTyID: {
const Type *ETy = CPV->getType();
if (ETy == Type::getInt8Ty(CPV->getContext())) {
- unsigned char c =
- (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
+ unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue();
ptr = &c;
aggBuffer->addBytes(ptr, 1, Bytes);
} else if (ETy == Type::getInt16Ty(CPV->getContext())) {
- short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
+ short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue();
ptr = (unsigned char *)&int16;
aggBuffer->addBytes(ptr, 2, Bytes);
} else if (ETy == Type::getInt32Ty(CPV->getContext())) {
@@ -1770,7 +1781,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
break;
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
- ConstantFoldConstantExpression(Cexpr, TD))) {
+ ConstantFoldConstantExpression(Cexpr, *TD))) {
int int32 = (int)(constInt->getZExtValue());
ptr = (unsigned char *)&int32;
aggBuffer->addBytes(ptr, 4, Bytes);
@@ -1778,7 +1789,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
}
if (Cexpr->getOpcode() == Instruction::PtrToInt) {
Value *v = Cexpr->getOperand(0)->stripPointerCasts();
- aggBuffer->addSymbol(v);
+ aggBuffer->addSymbol(v, Cexpr->getOperand(0));
aggBuffer->addZeros(4);
break;
}
@@ -1792,7 +1803,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
break;
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
- ConstantFoldConstantExpression(Cexpr, TD))) {
+ ConstantFoldConstantExpression(Cexpr, *TD))) {
long long int64 = (long long)(constInt->getZExtValue());
ptr = (unsigned char *)&int64;
aggBuffer->addBytes(ptr, 8, Bytes);
@@ -1800,7 +1811,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
}
if (Cexpr->getOpcode() == Instruction::PtrToInt) {
Value *v = Cexpr->getOperand(0)->stripPointerCasts();
- aggBuffer->addSymbol(v);
+ aggBuffer->addSymbol(v, Cexpr->getOperand(0));
aggBuffer->addZeros(8);
break;
}
@@ -1829,10 +1840,10 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
}
case Type::PointerTyID: {
if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
- aggBuffer->addSymbol(GVar);
+ aggBuffer->addSymbol(GVar, GVar);
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
const Value *v = Cexpr->stripPointerCasts();
- aggBuffer->addSymbol(v);
+ aggBuffer->addSymbol(v, Cexpr);
}
unsigned int s = TD->getTypeAllocSize(CPV->getType());
aggBuffer->addZeros(s);
@@ -1862,7 +1873,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
AggBuffer *aggBuffer) {
- const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout();
+ const DataLayout *TD = TM.getDataLayout();
int Bytes;
// Old constants
@@ -1973,6 +1984,212 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
return false;
}
+/// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
+/// a copy from AsmPrinter::lowerConstant, except customized to only handle
+/// expressions that are representable in PTX and create
+/// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
+const MCExpr *
+NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
+ MCContext &Ctx = OutContext;
+
+ if (CV->isNullValue() || isa<UndefValue>(CV))
+ return MCConstantExpr::Create(0, Ctx);
+
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
+ return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+ const MCSymbolRefExpr *Expr =
+ MCSymbolRefExpr::Create(getSymbol(GV), Ctx);
+ if (ProcessingGeneric) {
+ return NVPTXGenericMCSymbolRefExpr::Create(Expr, Ctx);
+ } else {
+ return Expr;
+ }
+ }
+
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
+ if (!CE) {
+ llvm_unreachable("Unknown constant value to lower!");
+ }
+
+ switch (CE->getOpcode()) {
+ default:
+ // If the code isn't optimized, there may be outstanding folding
+ // opportunities. Attempt to fold the expression using DataLayout as a
+ // last resort before giving up.
+ if (Constant *C = ConstantFoldConstantExpression(CE, *TM.getDataLayout()))
+ if (C != CE)
+ return lowerConstantForGV(C, ProcessingGeneric);
+
+ // Otherwise report the problem to the user.
+ {
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "Unsupported expression in static initializer: ";
+ CE->printAsOperand(OS, /*PrintType=*/false,
+ !MF ? nullptr : MF->getFunction()->getParent());
+ report_fatal_error(OS.str());
+ }
+
+ case Instruction::AddrSpaceCast: {
+ // Strip the addrspacecast and pass along the operand
+ PointerType *DstTy = cast<PointerType>(CE->getType());
+ if (DstTy->getAddressSpace() == 0) {
+ return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
+ }
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "Unsupported expression in static initializer: ";
+ CE->printAsOperand(OS, /*PrintType=*/ false,
+ !MF ? 0 : MF->getFunction()->getParent());
+ report_fatal_error(OS.str());
+ }
+
+ case Instruction::GetElementPtr: {
+ const DataLayout &DL = *TM.getDataLayout();
+
+ // Generate a symbolic expression for the byte address
+ APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
+ cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
+
+ const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
+ ProcessingGeneric);
+ if (!OffsetAI)
+ return Base;
+
+ int64_t Offset = OffsetAI.getSExtValue();
+ return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
+ Ctx);
+ }
+
+ case Instruction::Trunc:
+ // We emit the value and depend on the assembler to truncate the generated
+ // expression properly. This is important for differences between
+ // blockaddress labels. Since the two labels are in the same function, it
+ // is reasonable to treat their delta as a 32-bit value.
+ // FALL THROUGH.
+ case Instruction::BitCast:
+ return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
+
+ case Instruction::IntToPtr: {
+ const DataLayout &DL = *TM.getDataLayout();
+
+ // Handle casts to pointers by changing them into casts to the appropriate
+ // integer type. This promotes constant folding and simplifies this code.
+ Constant *Op = CE->getOperand(0);
+ Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()),
+ false/*ZExt*/);
+ return lowerConstantForGV(Op, ProcessingGeneric);
+ }
+
+ case Instruction::PtrToInt: {
+ const DataLayout &DL = *TM.getDataLayout();
+
+ // Support only foldable casts to/from pointers that can be eliminated by
+ // changing the pointer to the appropriately sized integer type.
+ Constant *Op = CE->getOperand(0);
+ Type *Ty = CE->getType();
+
+ const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
+
+ // We can emit the pointer value into this slot if the slot is an
+ // integer slot equal to the size of the pointer.
+ if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
+ return OpExpr;
+
+ // Otherwise the pointer is smaller than the resultant integer, mask off
+ // the high bits so we are sure to get a proper truncation if the input is
+ // a constant expr.
+ unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
+ const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx);
+ return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
+ }
+
+ // The MC library also has a right-shift operator, but it isn't consistently
+ // signed or unsigned between different targets.
+ case Instruction::Add: {
+ const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
+ const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
+ switch (CE->getOpcode()) {
+ default: llvm_unreachable("Unknown binary operator constant cast expr");
+ case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
+ }
+ }
+ }
+}
+
+// Copy of MCExpr::print customized for NVPTX
+void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
+ switch (Expr.getKind()) {
+ case MCExpr::Target:
+ return cast<MCTargetExpr>(&Expr)->PrintImpl(OS);
+ case MCExpr::Constant:
+ OS << cast<MCConstantExpr>(Expr).getValue();
+ return;
+
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
+ const MCSymbol &Sym = SRE.getSymbol();
+ OS << Sym;
+ return;
+ }
+
+ case MCExpr::Unary: {
+ const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
+ switch (UE.getOpcode()) {
+ case MCUnaryExpr::LNot: OS << '!'; break;
+ case MCUnaryExpr::Minus: OS << '-'; break;
+ case MCUnaryExpr::Not: OS << '~'; break;
+ case MCUnaryExpr::Plus: OS << '+'; break;
+ }
+ printMCExpr(*UE.getSubExpr(), OS);
+ return;
+ }
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
+
+ // Only print parens around the LHS if it is non-trivial.
+ if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
+ isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
+ printMCExpr(*BE.getLHS(), OS);
+ } else {
+ OS << '(';
+ printMCExpr(*BE.getLHS(), OS);
+ OS<< ')';
+ }
+
+ switch (BE.getOpcode()) {
+ case MCBinaryExpr::Add:
+ // Print "X-42" instead of "X+-42".
+ if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
+ if (RHSC->getValue() < 0) {
+ OS << RHSC->getValue();
+ return;
+ }
+ }
+
+ OS << '+';
+ break;
+ default: llvm_unreachable("Unhandled binary operator");
+ }
+
+ // Only print parens around the LHS if it is non-trivial.
+ if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
+ printMCExpr(*BE.getRHS(), OS);
+ } else {
+ OS << '(';
+ printMCExpr(*BE.getRHS(), OS);
+ OS << ')';
+ }
+ return;
+ }
+ }
+
+ llvm_unreachable("Invalid expression kind!");
+}
+
/// PrintAsmOperand - Print out an operand for an inline asm expression.
///
bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@@ -2067,16 +2284,9 @@ void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
}
}
-
-// Force static initialization.
-extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
- RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
- RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
-}
-
void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
std::stringstream temp;
- LineReader *reader = this->getReader(filename.str());
+ LineReader *reader = this->getReader(filename);
temp << "\n//";
temp << filename.str();
temp << ":";
@@ -2084,7 +2294,7 @@ void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
temp << " ";
temp << reader->readLine(line);
temp << "\n";
- this->OutStreamer.EmitRawText(Twine(temp.str()));
+ this->OutStreamer->EmitRawText(temp.str());
}
LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
index c11b579..301c686 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -40,6 +40,7 @@
// (subclass of MCStreamer).
namespace llvm {
+ class MCOperand;
class LineReader {
private:
@@ -86,14 +87,22 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
std::vector<unsigned char> buffer; // the buffer
SmallVector<unsigned, 4> symbolPosInBuffer;
SmallVector<const Value *, 4> Symbols;
+ // SymbolsBeforeStripping[i] is the original form of Symbols[i] before
+ // stripping pointer casts, i.e.,
+ // Symbols[i] == SymbolsBeforeStripping[i]->stripPointerCasts().
+ //
+ // We need to keep these values because AggBuffer::print decides whether to
+ // emit a "generic()" cast for Symbols[i] depending on the address space of
+ // SymbolsBeforeStripping[i].
+ SmallVector<const Value *, 4> SymbolsBeforeStripping;
unsigned curpos;
raw_ostream &O;
NVPTXAsmPrinter &AP;
bool EmitGeneric;
public:
- AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP)
- : size(_size), buffer(_size), O(_O), AP(_AP) {
+ AggBuffer(unsigned size, raw_ostream &O, NVPTXAsmPrinter &AP)
+ : size(size), buffer(size), O(O), AP(AP) {
curpos = 0;
numSymbols = 0;
EmitGeneric = AP.EmitGeneric;
@@ -119,9 +128,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
}
return curpos;
}
- void addSymbol(const Value *GVar) {
+ void addSymbol(const Value *GVar, const Value *GVarBeforeStripping) {
symbolPosInBuffer.push_back(curpos);
Symbols.push_back(GVar);
+ SymbolsBeforeStripping.push_back(GVarBeforeStripping);
numSymbols++;
}
void print() {
@@ -138,17 +148,18 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
unsigned int nSym = 0;
unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
unsigned int nBytes = 4;
- if (AP.nvptxSubtarget.is64Bit())
+ if (static_cast<const NVPTXTargetMachine &>(AP.TM).is64Bit())
nBytes = 8;
for (pos = 0; pos < size; pos += nBytes) {
if (pos)
O << ", ";
if (pos == nextSymbolPos) {
const Value *v = Symbols[nSym];
+ const Value *v0 = SymbolsBeforeStripping[nSym];
if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
MCSymbol *Name = AP.getSymbol(GVar);
- PointerType *PTy = dyn_cast<PointerType>(GVar->getType());
- bool IsNonGenericPointer = false;
+ PointerType *PTy = dyn_cast<PointerType>(v0->getType());
+ bool IsNonGenericPointer = false; // Is v0 a non-generic pointer?
if (PTy && PTy->getAddressSpace() != 0) {
IsNonGenericPointer = true;
}
@@ -159,8 +170,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
} else {
O << *Name;
}
- } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
- O << *AP.lowerConstant(Cexpr);
+ } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
+ const MCExpr *Expr =
+ AP.lowerConstantForGV(cast<Constant>(CExpr), false);
+ AP.printMCExpr(*Expr, O);
} else
llvm_unreachable("symbol type unknown");
nSym++;
@@ -187,6 +200,7 @@ private:
const Function *F;
std::string CurrentFnName;
+ void EmitBasicBlockStart(const MachineBasicBlock &MBB) const override;
void EmitFunctionEntryLabel() override;
void EmitFunctionBodyStart() override;
void EmitFunctionBodyEnd() override;
@@ -211,7 +225,7 @@ private:
void printParamName(Function::const_arg_iterator I, int paramIndex,
raw_ostream &O);
void emitGlobals(const Module &M);
- void emitHeader(Module &M, raw_ostream &O);
+ void emitHeader(Module &M, raw_ostream &O, const NVPTXSubtarget &STI);
void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const;
void emitVirtualRegister(unsigned int vr, raw_ostream &);
void emitFunctionExternParamList(const MachineFunction &MF);
@@ -230,6 +244,10 @@ private:
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &) override;
+
+ const MCExpr *lowerConstantForGV(const Constant *CV, bool ProcessingGeneric);
+ void printMCExpr(const MCExpr &Expr, raw_ostream &OS);
+
protected:
bool doInitialization(Module &M) override;
bool doFinalization(Module &M) override;
@@ -247,8 +265,10 @@ private:
typedef DenseMap<unsigned, unsigned> VRegMap;
typedef DenseMap<const TargetRegisterClass *, VRegMap> VRegRCMap;
VRegRCMap VRegMapping;
- // cache the subtarget here.
- const NVPTXSubtarget &nvptxSubtarget;
+
+ // Cache the subtarget here.
+ const NVPTXSubtarget *nvptxSubtarget;
+
// Build the map between type name and ID based on module's type
// symbol table.
std::map<const Type *, std::string> TypeNameMap;
@@ -281,6 +301,8 @@ private:
MCOperand &MCOp);
void lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp);
+ bool isLoopHeaderOfNoUnroll(const MachineBasicBlock &MBB) const;
+
LineReader *reader;
LineReader *getReader(std::string);
@@ -298,12 +320,12 @@ private:
bool EmitGeneric;
public:
- NVPTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer),
- nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
+ NVPTXAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)),
+ EmitGeneric(static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() ==
+ NVPTX::CUDA) {
CurrentBankselLabelInBasicBlock = "";
reader = nullptr;
- EmitGeneric = (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA);
}
~NVPTXAsmPrinter() {
@@ -311,6 +333,15 @@ public:
delete reader;
}
+ bool runOnMachineFunction(MachineFunction &F) override {
+ nvptxSubtarget = &F.getSubtarget<NVPTXSubtarget>();
+ return AsmPrinter::runOnMachineFunction(F);
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineLoopInfo>();
+ AsmPrinter::getAnalysisUsage(AU);
+ }
+
bool ignoreLoc(const MachineInstr &);
std::string getVirtualRegisterName(unsigned) const;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
index 962b123..7d4be8e 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
@@ -19,8 +19,8 @@
#include "NVPTX.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
-#include "llvm/PassManager.h"
#include "llvm/Support/raw_ostream.h"
#include <string>
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
index f3a095d..ae63cae 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
@@ -123,19 +123,17 @@ bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(
// =>
// %0 = gep X, indices
// %1 = addrspacecast %0
- GetElementPtrInst *NewGEPI = GetElementPtrInst::Create(Cast->getOperand(0),
- Indices,
- GEP->getName(),
- GEPI);
+ GetElementPtrInst *NewGEPI = GetElementPtrInst::Create(
+ GEP->getSourceElementType(), Cast->getOperand(0), Indices,
+ GEP->getName(), GEPI);
NewGEPI->setIsInBounds(GEP->isInBounds());
GEP->replaceAllUsesWith(
new AddrSpaceCastInst(NewGEPI, GEP->getType(), "", GEPI));
} else {
// GEP is a constant expression.
Constant *NewGEPCE = ConstantExpr::getGetElementPtr(
- cast<Constant>(Cast->getOperand(0)),
- Indices,
- GEP->isInBounds());
+ GEP->getSourceElementType(), cast<Constant>(Cast->getOperand(0)),
+ Indices, GEP->isInBounds());
GEP->replaceAllUsesWith(
ConstantExpr::getAddrSpaceCast(NewGEPCE, GEP->getType()));
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index 314df38..5503494 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -26,15 +26,15 @@
using namespace llvm;
-NVPTXFrameLowering::NVPTXFrameLowering(NVPTXSubtarget &STI)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0),
- is64bit(STI.is64Bit()) {}
+NVPTXFrameLowering::NVPTXFrameLowering()
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0) {}
bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; }
-void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
+void NVPTXFrameLowering::emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
if (MF.getFrameInfo()->hasStackObjects()) {
- MachineBasicBlock &MBB = MF.front();
+ assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
// Insert "mov.u32 %SP, %Depot"
MachineBasicBlock::iterator MBBI = MBB.begin();
// This instruction really occurs before first instruction
@@ -45,7 +45,7 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
// mov %SPL, %depot;
// cvta.local %SP, %SPL;
- if (is64bit) {
+ if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int64RegsRegClass);
MachineInstr *MI =
BuildMI(MBB, MBBI, dl, MF.getSubtarget().getInstrInfo()->get(
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h b/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h
index 0846b78..14f8bb7 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -19,18 +19,16 @@
namespace llvm {
class NVPTXSubtarget;
class NVPTXFrameLowering : public TargetFrameLowering {
- bool is64bit;
-
public:
- explicit NVPTXFrameLowering(NVPTXSubtarget &STI);
+ explicit NVPTXFrameLowering();
bool hasFP(const MachineFunction &MF) const override;
- void emitPrologue(MachineFunction &MF) const override;
+ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const override;
+ void
+ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const override;
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 5f06d9a..6fd09c4 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -22,10 +22,10 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/ValueMap.h"
-#include "llvm/PassManager.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
using namespace llvm;
@@ -343,9 +343,11 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
// GetElementPtrConstantExpr
return cast<GEPOperator>(C)->isInBounds()
? Builder.CreateGEP(
+ cast<GEPOperator>(C)->getSourceElementType(),
NewOperands[0],
makeArrayRef(&NewOperands[1], NumOperands - 1))
: Builder.CreateInBoundsGEP(
+ cast<GEPOperator>(C)->getSourceElementType(),
NewOperands[0],
makeArrayRef(&NewOperands[1], NumOperands - 1));
case Instruction::Select:
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index cd0422d..fa38a68 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -50,11 +50,15 @@ FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(tm, OptLevel),
- Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
+ : SelectionDAGISel(tm, OptLevel), TM(tm) {
doMulWide = (OptLevel > 0);
}
+bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+ Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
+ return SelectionDAGISel::runOnMachineFunction(MF);
+}
+
int NVPTXDAGToDAGISel::getDivF32Level() const {
if (UsePrecDivF32.getNumOccurrences() > 0) {
// If nvptx-prec-div32=N is used on the command-line, always honor it
@@ -74,10 +78,7 @@ bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
return UsePrecSqrtF32;
} else {
// Otherwise, use sqrt.approx if fast math is enabled
- if (TM.Options.UnsafeFPMath)
- return false;
- else
- return true;
+ return !TM.Options.UnsafeFPMath;
}
}
@@ -89,16 +90,14 @@ bool NVPTXDAGToDAGISel::useF32FTZ() const {
const Function *F = MF->getFunction();
// Otherwise, check for an nvptx-f32ftz attribute on the function
if (F->hasFnAttribute("nvptx-f32ftz"))
- return (F->getAttributes().getAttribute(AttributeSet::FunctionIndex,
- "nvptx-f32ftz")
- .getValueAsString() == "true");
+ return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
else
return false;
}
}
bool NVPTXDAGToDAGISel::allowFMA() const {
- const NVPTXTargetLowering *TL = Subtarget.getTargetLowering();
+ const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
return TL->allowFMA(*MF, OptLevel);
}
@@ -525,8 +524,7 @@ SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
}
}
-static unsigned int getCodeAddrSpace(MemSDNode *N,
- const NVPTXSubtarget &Subtarget) {
+static unsigned int getCodeAddrSpace(MemSDNode *N) {
const Value *Src = N->getMemOperand()->getValue();
if (!Src)
@@ -579,20 +577,16 @@ SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
switch (SrcAddrSpace) {
default: report_fatal_error("Bad address space in addrspacecast");
case ADDRESS_SPACE_GLOBAL:
- Opc = Subtarget.is64Bit() ? NVPTX::cvta_global_yes_64
- : NVPTX::cvta_global_yes;
+ Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
break;
case ADDRESS_SPACE_SHARED:
- Opc = Subtarget.is64Bit() ? NVPTX::cvta_shared_yes_64
- : NVPTX::cvta_shared_yes;
+ Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
break;
case ADDRESS_SPACE_CONST:
- Opc = Subtarget.is64Bit() ? NVPTX::cvta_const_yes_64
- : NVPTX::cvta_const_yes;
+ Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
break;
case ADDRESS_SPACE_LOCAL:
- Opc = Subtarget.is64Bit() ? NVPTX::cvta_local_yes_64
- : NVPTX::cvta_local_yes;
+ Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
break;
}
return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
@@ -604,20 +598,20 @@ SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
switch (DstAddrSpace) {
default: report_fatal_error("Bad address space in addrspacecast");
case ADDRESS_SPACE_GLOBAL:
- Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_global_yes_64
- : NVPTX::cvta_to_global_yes;
+ Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
+ : NVPTX::cvta_to_global_yes;
break;
case ADDRESS_SPACE_SHARED:
- Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_shared_yes_64
- : NVPTX::cvta_to_shared_yes;
+ Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
+ : NVPTX::cvta_to_shared_yes;
break;
case ADDRESS_SPACE_CONST:
- Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_const_yes_64
- : NVPTX::cvta_to_const_yes;
+ Opc =
+ TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
break;
case ADDRESS_SPACE_LOCAL:
- Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_local_yes_64
- : NVPTX::cvta_to_local_yes;
+ Opc =
+ TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
break;
}
return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
@@ -638,7 +632,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
return nullptr;
// Address Space Setting
- unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
+ unsigned int codeAddrSpace = getCodeAddrSpace(LD);
// Volatile Setting
// - .volatile is only availalble for .global and .shared
@@ -709,13 +703,12 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
default:
return nullptr;
}
- SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
- getI32Imm(vecType), getI32Imm(fromType),
- getI32Imm(fromTypeWidth), Addr, Chain };
+ SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
+ getI32Imm(vecType, dl), getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl), Addr, Chain };
NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
- } else if (Subtarget.is64Bit()
- ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
- : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
+ } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
+ : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
switch (TargetVT) {
case MVT::i8:
Opcode = NVPTX::LD_i8_asi;
@@ -738,14 +731,13 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
default:
return nullptr;
}
- SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
- getI32Imm(vecType), getI32Imm(fromType),
- getI32Imm(fromTypeWidth), Base, Offset, Chain };
+ SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
+ getI32Imm(vecType, dl), getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
- } else if (Subtarget.is64Bit()
- ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
- : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
- if (Subtarget.is64Bit()) {
+ } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
+ : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
+ if (TM.is64Bit()) {
switch (TargetVT) {
case MVT::i8:
Opcode = NVPTX::LD_i8_ari_64;
@@ -792,12 +784,12 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
return nullptr;
}
}
- SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
- getI32Imm(vecType), getI32Imm(fromType),
- getI32Imm(fromTypeWidth), Base, Offset, Chain };
+ SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
+ getI32Imm(vecType, dl), getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
} else {
- if (Subtarget.is64Bit()) {
+ if (TM.is64Bit()) {
switch (TargetVT) {
case MVT::i8:
Opcode = NVPTX::LD_i8_areg_64;
@@ -844,9 +836,9 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
return nullptr;
}
}
- SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
- getI32Imm(vecType), getI32Imm(fromType),
- getI32Imm(fromTypeWidth), N1, Chain };
+ SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
+ getI32Imm(vecType, dl), getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl), N1, Chain };
NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
}
@@ -874,7 +866,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
return nullptr;
// Address Space Setting
- unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
+ unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
// Volatile Setting
// - .volatile is only availalble for .global and .shared
@@ -970,13 +962,12 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
break;
}
- SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
- getI32Imm(VecType), getI32Imm(FromType),
- getI32Imm(FromTypeWidth), Addr, Chain };
+ SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL), getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL), Addr, Chain };
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
- } else if (Subtarget.is64Bit()
- ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
- : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
+ } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
+ : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
switch (N->getOpcode()) {
default:
return nullptr;
@@ -1024,14 +1015,13 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
break;
}
- SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
- getI32Imm(VecType), getI32Imm(FromType),
- getI32Imm(FromTypeWidth), Base, Offset, Chain };
+ SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL), getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
- } else if (Subtarget.is64Bit()
- ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
- : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
- if (Subtarget.is64Bit()) {
+ } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
+ : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
+ if (TM.is64Bit()) {
switch (N->getOpcode()) {
default:
return nullptr;
@@ -1127,13 +1117,13 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
}
}
- SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
- getI32Imm(VecType), getI32Imm(FromType),
- getI32Imm(FromTypeWidth), Base, Offset, Chain };
+ SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL), getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
} else {
- if (Subtarget.is64Bit()) {
+ if (TM.is64Bit()) {
switch (N->getOpcode()) {
default:
return nullptr;
@@ -1229,9 +1219,9 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
}
}
- SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
- getI32Imm(VecType), getI32Imm(FromType),
- getI32Imm(FromTypeWidth), Op1, Chain };
+ SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL), getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL), Op1, Chain };
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
}
@@ -1425,10 +1415,9 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
SDValue Ops[] = { Addr, Chain };
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
- } else if (Subtarget.is64Bit()
- ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
- : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
- if (Subtarget.is64Bit()) {
+ } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
+ : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
+ if (TM.is64Bit()) {
switch (N->getOpcode()) {
default:
return nullptr;
@@ -1710,7 +1699,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
} else {
- if (Subtarget.is64Bit()) {
+ if (TM.is64Bit()) {
switch (N->getOpcode()) {
default:
return nullptr;
@@ -2013,7 +2002,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
return nullptr;
// Address Space Setting
- unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
+ unsigned int codeAddrSpace = getCodeAddrSpace(ST);
// Volatile Setting
// - .volatile is only availalble for .global and .shared
@@ -2079,13 +2068,13 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
default:
return nullptr;
}
- SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
- getI32Imm(vecType), getI32Imm(toType),
- getI32Imm(toTypeWidth), Addr, Chain };
+ SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
+ getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
+ getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
+ Chain };
NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
- } else if (Subtarget.is64Bit()
- ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
- : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+ } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
+ : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
switch (SourceVT) {
case MVT::i8:
Opcode = NVPTX::ST_i8_asi;
@@ -2108,14 +2097,14 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
default:
return nullptr;
}
- SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
- getI32Imm(vecType), getI32Imm(toType),
- getI32Imm(toTypeWidth), Base, Offset, Chain };
+ SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
+ getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
+ getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
+ Offset, Chain };
NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
- } else if (Subtarget.is64Bit()
- ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
- : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
- if (Subtarget.is64Bit()) {
+ } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
+ : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+ if (TM.is64Bit()) {
switch (SourceVT) {
case MVT::i8:
Opcode = NVPTX::ST_i8_ari_64;
@@ -2162,12 +2151,13 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
return nullptr;
}
}
- SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
- getI32Imm(vecType), getI32Imm(toType),
- getI32Imm(toTypeWidth), Base, Offset, Chain };
+ SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
+ getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
+ getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
+ Offset, Chain };
NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
} else {
- if (Subtarget.is64Bit()) {
+ if (TM.is64Bit()) {
switch (SourceVT) {
case MVT::i8:
Opcode = NVPTX::ST_i8_areg_64;
@@ -2214,9 +2204,10 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
return nullptr;
}
}
- SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
- getI32Imm(vecType), getI32Imm(toType),
- getI32Imm(toTypeWidth), N2, Chain };
+ SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
+ getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
+ getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
+ Chain };
NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
}
@@ -2241,7 +2232,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
EVT StoreVT = MemSD->getMemoryVT();
// Address Space Setting
- unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
+ unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
report_fatal_error("Cannot store to pointer that points to constant "
@@ -2290,11 +2281,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
return nullptr;
}
- StOps.push_back(getI32Imm(IsVolatile));
- StOps.push_back(getI32Imm(CodeAddrSpace));
- StOps.push_back(getI32Imm(VecType));
- StOps.push_back(getI32Imm(ToType));
- StOps.push_back(getI32Imm(ToTypeWidth));
+ StOps.push_back(getI32Imm(IsVolatile, DL));
+ StOps.push_back(getI32Imm(CodeAddrSpace, DL));
+ StOps.push_back(getI32Imm(VecType, DL));
+ StOps.push_back(getI32Imm(ToType, DL));
+ StOps.push_back(getI32Imm(ToTypeWidth, DL));
if (SelectDirectAddr(N2, Addr)) {
switch (N->getOpcode()) {
@@ -2344,9 +2335,8 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
break;
}
StOps.push_back(Addr);
- } else if (Subtarget.is64Bit()
- ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
- : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+ } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
+ : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
switch (N->getOpcode()) {
default:
return nullptr;
@@ -2395,10 +2385,9 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
}
StOps.push_back(Base);
StOps.push_back(Offset);
- } else if (Subtarget.is64Bit()
- ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
- : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
- if (Subtarget.is64Bit()) {
+ } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
+ : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+ if (TM.is64Bit()) {
switch (N->getOpcode()) {
default:
return nullptr;
@@ -2496,7 +2485,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
StOps.push_back(Base);
StOps.push_back(Offset);
} else {
- if (Subtarget.is64Bit()) {
+ if (TM.is64Bit()) {
switch (N->getOpcode()) {
default:
return nullptr;
@@ -2725,13 +2714,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
SmallVector<SDValue, 2> Ops;
- Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
+ Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Ops.push_back(Chain);
Ops.push_back(Flag);
- SDNode *Ret =
- CurDAG->getMachineNode(Opc, DL, VTs, Ops);
- return Ret;
+ return CurDAG->getMachineNode(Opc, DL, VTs, Ops);
}
SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
@@ -2761,7 +2748,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
SmallVector<SDValue, 6> Ops;
for (unsigned i = 0; i < NumElts; ++i)
Ops.push_back(N->getOperand(i + 2));
- Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
+ Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Ops.push_back(Chain);
// Determine target opcode
@@ -2889,8 +2876,8 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumElts; ++i)
Ops.push_back(N->getOperand(i + 3));
- Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
- Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
+ Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
+ Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Ops.push_back(Chain);
Ops.push_back(Flag);
@@ -2985,7 +2972,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
// the selected StoreParam node.
case NVPTXISD::StoreParamU32: {
Opcode = NVPTX::StoreParamI32;
- SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
+ SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
MVT::i32);
SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
MVT::i32, Ops[0], CvtNone);
@@ -2994,7 +2981,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
}
case NVPTXISD::StoreParamS32: {
Opcode = NVPTX::StoreParamI32;
- SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
+ SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
MVT::i32);
SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
MVT::i32, Ops[0], CvtNone);
@@ -4742,6 +4729,7 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
/// SelectBFE - Look for instruction sequences that can be made more efficient
/// by using the 'bfe' (bit-field extract) PTX instruction
SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
+ SDLoc DL(N);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue Len;
@@ -4772,8 +4760,8 @@ SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
}
// How many bits are in our mask?
- uint64_t NumBits = CountTrailingOnes_64(MaskVal);
- Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
+ uint64_t NumBits = countTrailingOnes(MaskVal);
+ Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
// We have a 'srl/and' pair, extract the effective start bit and length
@@ -4791,7 +4779,7 @@ SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
// emitting the srl/and pair.
return NULL;
}
- Start = CurDAG->getTargetConstant(StartVal, MVT::i32);
+ Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
} else {
// Do not handle the case where the shift amount (can be zero if no srl
// was found) is not constant. We could handle this case, but it would
@@ -4836,10 +4824,10 @@ SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
NumZeros = 0;
// The number of bits in the result bitfield will be the number of
// trailing ones (the AND) minus the number of bits we shift off
- NumBits = CountTrailingOnes_64(MaskVal) - ShiftAmt;
+ NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
} else if (isShiftedMask_64(MaskVal)) {
NumZeros = countTrailingZeros(MaskVal);
- unsigned NumOnes = CountTrailingOnes_64(MaskVal >> NumZeros);
+ unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
// The number of bits in the result bitfield will be the number of
// trailing zeros plus the number of set bits in the mask minus the
// number of bits we shift off
@@ -4856,8 +4844,8 @@ SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
}
Val = AndLHS;
- Start = CurDAG->getTargetConstant(ShiftAmt, MVT::i32);
- Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
+ Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
+ Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
} else if (LHS->getOpcode() == ISD::SHL) {
// Here, we have a pattern like:
//
@@ -4897,10 +4885,10 @@ SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
}
Start =
- CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, MVT::i32);
+ CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, MVT::i32);
Len =
CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
- OuterShiftAmt, MVT::i32);
+ OuterShiftAmt, DL, MVT::i32);
if (N->getOpcode() == ISD::SRA) {
// If we have a arithmetic right shift, we need to use the signed bfe
@@ -4941,10 +4929,7 @@ SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
Val, Start, Len
};
- SDNode *Ret =
- CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
-
- return Ret;
+ return CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops);
}
// SelectDirectAddr - Match a direct address for DAG.
@@ -4976,7 +4961,8 @@ bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
SDValue base = Addr.getOperand(0);
if (SelectDirectAddr(base, Base)) {
- Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
+ mvt);
return true;
}
}
@@ -5001,7 +4987,7 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(
SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
- Offset = CurDAG->getTargetConstant(0, mvt);
+ Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
return true;
}
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
@@ -5019,7 +5005,8 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
else
Base = Addr.getOperand(0);
- Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
+ mvt);
return true;
}
}
@@ -5056,15 +5043,15 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
- const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
+ const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
SDValue Op0, Op1;
- switch (ConstraintCode) {
+ switch (ConstraintID) {
default:
return true;
- case 'm': // memory
+ case InlineAsm::Constraint_m: // memory
if (SelectDirectAddr(Op, Op0)) {
OutOps.push_back(Op0);
- OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
+ OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
return false;
}
if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 69afcd7..fe20580 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -26,6 +26,7 @@ using namespace llvm;
namespace {
class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
+ const NVPTXTargetMachine &TM;
// If true, generate mul.wide from sext and mul
bool doMulWide;
@@ -43,11 +44,11 @@ public:
const char *getPassName() const override {
return "NVPTX DAG->DAG Pattern Instruction Selection";
}
-
- const NVPTXSubtarget &Subtarget;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ const NVPTXSubtarget *Subtarget;
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
private:
// Include the pieces autogenerated from the target description.
@@ -70,8 +71,8 @@ private:
SDNode *SelectSurfaceIntrinsic(SDNode *N);
SDNode *SelectBFE(SDNode *N);
- inline SDValue getI32Imm(unsigned Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i32);
+ inline SDValue getI32Imm(unsigned Imm, SDLoc DL) {
+ return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
}
// Match direct address complex pattern.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 093ba1a..805847a 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -106,9 +106,9 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty,
}
// NVPTXTargetLowering Constructor.
-NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM)
- : TargetLowering(TM), nvTM(&TM),
- nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
+NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
+ const NVPTXSubtarget &STI)
+ : TargetLowering(TM), nvTM(&TM), STI(STI) {
// always lower memset, memcpy, and memmove intrinsics to load/store
// instructions, rather
@@ -167,14 +167,14 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM)
setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom);
- if (nvptxSubtarget.hasROT64()) {
+ if (STI.hasROT64()) {
setOperationAction(ISD::ROTL, MVT::i64, Legal);
setOperationAction(ISD::ROTR, MVT::i64, Legal);
} else {
setOperationAction(ISD::ROTL, MVT::i64, Expand);
setOperationAction(ISD::ROTR, MVT::i64, Expand);
}
- if (nvptxSubtarget.hasROT32()) {
+ if (STI.hasROT32()) {
setOperationAction(ISD::ROTL, MVT::i32, Legal);
setOperationAction(ISD::ROTR, MVT::i32, Legal);
} else {
@@ -259,6 +259,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM)
setOperationAction(ISD::CTPOP, MVT::i32, Legal);
setOperationAction(ISD::CTPOP, MVT::i64, Legal);
+ // PTX does not directly support SELP of i1, so promote to i32 first
+ setOperationAction(ISD::SELECT, MVT::i1, Custom);
+
// We have some custom DAG combine patterns for these nodes
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::AND);
@@ -268,17 +271,19 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM)
// Now deduce the information based on the above mentioned
// actions
- computeRegisterProperties();
+ computeRegisterProperties(STI.getRegisterInfo());
}
const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch (Opcode) {
- default:
- return nullptr;
+ switch ((NVPTXISD::NodeType)Opcode) {
+ case NVPTXISD::FIRST_NUMBER:
+ break;
case NVPTXISD::CALL:
return "NVPTXISD::CALL";
case NVPTXISD::RET_FLAG:
return "NVPTXISD::RET_FLAG";
+ case NVPTXISD::LOAD_PARAM:
+ return "NVPTXISD::LOAD_PARAM";
case NVPTXISD::Wrapper:
return "NVPTXISD::Wrapper";
case NVPTXISD::DeclareParam:
@@ -287,10 +292,14 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "NVPTXISD::DeclareScalarParam";
case NVPTXISD::DeclareRet:
return "NVPTXISD::DeclareRet";
+ case NVPTXISD::DeclareScalarRet:
+ return "NVPTXISD::DeclareScalarRet";
case NVPTXISD::DeclareRetParam:
return "NVPTXISD::DeclareRetParam";
case NVPTXISD::PrintCall:
return "NVPTXISD::PrintCall";
+ case NVPTXISD::PrintCallUni:
+ return "NVPTXISD::PrintCallUni";
case NVPTXISD::LoadParam:
return "NVPTXISD::LoadParam";
case NVPTXISD::LoadParamV2:
@@ -363,6 +372,8 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "NVPTXISD::FUN_SHFR_CLAMP";
case NVPTXISD::IMAD:
return "NVPTXISD::IMAD";
+ case NVPTXISD::Dummy:
+ return "NVPTXISD::Dummy";
case NVPTXISD::MUL_WIDE_SIGNED:
return "NVPTXISD::MUL_WIDE_SIGNED";
case NVPTXISD::MUL_WIDE_UNSIGNED:
@@ -852,6 +863,7 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
case NVPTXISD::Suld3DV4I16Zero: return "NVPTXISD::Suld3DV4I16Zero";
case NVPTXISD::Suld3DV4I32Zero: return "NVPTXISD::Suld3DV4I32Zero";
}
+ return nullptr;
}
TargetLoweringBase::LegalizeTypeAction
@@ -876,7 +888,7 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
unsigned retAlignment,
const ImmutableCallSite *CS) const {
- bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+ bool isABI = (STI.getSmVersion() >= 20);
assert(isABI && "Non-ABI compilation is not supported");
if (!isABI)
return "";
@@ -927,7 +939,7 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
}
first = false;
- if (Outs[OIdx].Flags.isByVal() == false) {
+ if (!Outs[OIdx].Flags.isByVal()) {
if (Ty->isAggregateType() || Ty->isVectorTy()) {
unsigned align = 0;
const CallInst *CallI = cast<CallInst>(CS->getInstruction());
@@ -1041,7 +1053,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Type *retTy = CLI.RetTy;
ImmutableCallSite *CS = CLI.CS;
- bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+ bool isABI = (STI.getSmVersion() >= 20);
assert(isABI && "Non-ABI compilation is not supported");
if (!isABI)
return Chain;
@@ -1050,9 +1062,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
const Function *F = MF.getFunction();
SDValue tempChain = Chain;
- Chain =
- DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true),
- dl);
+ Chain = DAG.getCALLSEQ_START(Chain,
+ DAG.getIntPtrConstant(uniqueCallSite, dl, true),
+ dl);
SDValue InFlag = Chain.getValue(1);
unsigned paramCount = 0;
@@ -1072,7 +1084,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
EVT VT = Outs[OIdx].VT;
Type *Ty = Args[i].Ty;
- if (Outs[OIdx].Flags.isByVal() == false) {
+ if (!Outs[OIdx].Flags.isByVal()) {
if (Ty->isAggregateType()) {
// aggregate
SmallVector<EVT, 16> vtparts;
@@ -1083,9 +1095,11 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// declare .param .align <align> .b8 .param<n>[<size>];
unsigned sz = TD->getTypeAllocSize(Ty);
SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32),
- DAG.getConstant(paramCount, MVT::i32),
- DAG.getConstant(sz, MVT::i32), InFlag };
+ SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, dl,
+ MVT::i32),
+ DAG.getConstant(paramCount, dl, MVT::i32),
+ DAG.getConstant(sz, dl, MVT::i32),
+ InFlag };
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
DeclareParamOps);
InFlag = Chain.getValue(1);
@@ -1100,8 +1114,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue CopyParamOps[] = { Chain,
- DAG.getConstant(paramCount, MVT::i32),
- DAG.getConstant(Offsets[j], MVT::i32),
+ DAG.getConstant(paramCount, dl, MVT::i32),
+ DAG.getConstant(Offsets[j], dl, MVT::i32),
StVal, InFlag };
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
CopyParamVTs, CopyParamOps,
@@ -1121,9 +1135,11 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// declare .param .align <align> .b8 .param<n>[<size>];
unsigned sz = TD->getTypeAllocSize(Ty);
SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32),
- DAG.getConstant(paramCount, MVT::i32),
- DAG.getConstant(sz, MVT::i32), InFlag };
+ SDValue DeclareParamOps[] = { Chain,
+ DAG.getConstant(align, dl, MVT::i32),
+ DAG.getConstant(paramCount, dl, MVT::i32),
+ DAG.getConstant(sz, dl, MVT::i32),
+ InFlag };
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
DeclareParamOps);
InFlag = Chain.getValue(1);
@@ -1144,8 +1160,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue CopyParamOps[] = { Chain,
- DAG.getConstant(paramCount, MVT::i32),
- DAG.getConstant(0, MVT::i32), Elt,
+ DAG.getConstant(paramCount, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32), Elt,
InFlag };
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
CopyParamVTs, CopyParamOps,
@@ -1161,9 +1177,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue CopyParamOps[] = { Chain,
- DAG.getConstant(paramCount, MVT::i32),
- DAG.getConstant(0, MVT::i32), Elt0, Elt1,
- InFlag };
+ DAG.getConstant(paramCount, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32), Elt0,
+ Elt1, InFlag };
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl,
CopyParamVTs, CopyParamOps,
MemVT, MachinePointerInfo());
@@ -1193,8 +1209,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue StoreVal;
SmallVector<SDValue, 8> Ops;
Ops.push_back(Chain);
- Ops.push_back(DAG.getConstant(paramCount, MVT::i32));
- Ops.push_back(DAG.getConstant(curOffset, MVT::i32));
+ Ops.push_back(DAG.getConstant(paramCount, dl, MVT::i32));
+ Ops.push_back(DAG.getConstant(curOffset, dl, MVT::i32));
unsigned Opc = NVPTXISD::StoreParamV2;
@@ -1261,9 +1277,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue DeclareParamOps[] = { Chain,
- DAG.getConstant(paramCount, MVT::i32),
- DAG.getConstant(sz, MVT::i32),
- DAG.getConstant(0, MVT::i32), InFlag };
+ DAG.getConstant(paramCount, dl, MVT::i32),
+ DAG.getConstant(sz, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32), InFlag };
Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
DeclareParamOps);
InFlag = Chain.getValue(1);
@@ -1276,8 +1292,10 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
OutV = DAG.getNode(opc, dl, MVT::i16, OutV);
}
SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
- DAG.getConstant(0, MVT::i32), OutV, InFlag };
+ SDValue CopyParamOps[] = { Chain,
+ DAG.getConstant(paramCount, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32), OutV,
+ InFlag };
unsigned opcode = NVPTXISD::StoreParam;
if (Outs[OIdx].Flags.isZExt())
@@ -1306,9 +1324,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// so we don't need to worry about natural alignment or not.
// See TargetLowering::LowerCallTo().
SDValue DeclareParamOps[] = {
- Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), MVT::i32),
- DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32),
- InFlag
+ Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), dl, MVT::i32),
+ DAG.getConstant(paramCount, dl, MVT::i32),
+ DAG.getConstant(sz, dl, MVT::i32), InFlag
};
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
DeclareParamOps);
@@ -1319,7 +1337,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset);
SDValue srcAddr =
DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx],
- DAG.getConstant(curOffset, getPointerTy()));
+ DAG.getConstant(curOffset, dl, getPointerTy()));
SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
MachinePointerInfo(), false, false, false,
PartAlign);
@@ -1327,9 +1345,10 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal);
}
SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
- DAG.getConstant(curOffset, MVT::i32), theVal,
- InFlag };
+ SDValue CopyParamOps[] = { Chain,
+ DAG.getConstant(paramCount, dl, MVT::i32),
+ DAG.getConstant(curOffset, dl, MVT::i32),
+ theVal, InFlag };
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
CopyParamOps, elemtype,
MachinePointerInfo());
@@ -1361,9 +1380,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (resultsz < 32)
resultsz = 32;
SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
- DAG.getConstant(resultsz, MVT::i32),
- DAG.getConstant(0, MVT::i32), InFlag };
+ SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
+ DAG.getConstant(resultsz, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32), InFlag };
Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
DeclareRetOps);
InFlag = Chain.getValue(1);
@@ -1371,9 +1390,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
retAlignment = getArgumentAlignment(Callee, CS, retTy, 0);
SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue DeclareRetOps[] = { Chain,
- DAG.getConstant(retAlignment, MVT::i32),
- DAG.getConstant(resultsz / 8, MVT::i32),
- DAG.getConstant(0, MVT::i32), InFlag };
+ DAG.getConstant(retAlignment, dl, MVT::i32),
+ DAG.getConstant(resultsz / 8, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32), InFlag };
Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
DeclareRetOps);
InFlag = Chain.getValue(1);
@@ -1401,7 +1420,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Op to just print "call"
SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue PrintCallOps[] = {
- Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, MVT::i32), InFlag
+ Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InFlag
};
Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall),
dl, PrintCallVTs, PrintCallOps);
@@ -1427,20 +1446,22 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
else
opcode = NVPTXISD::CallArg;
SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32),
- DAG.getConstant(i, MVT::i32), InFlag };
+ SDValue CallArgOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
+ DAG.getConstant(i, dl, MVT::i32), InFlag };
Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps);
InFlag = Chain.getValue(1);
}
SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32),
+ SDValue CallArgEndOps[] = { Chain,
+ DAG.getConstant(Func ? 1 : 0, dl, MVT::i32),
InFlag };
Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps);
InFlag = Chain.getValue(1);
if (!Func) {
SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32),
+ SDValue PrototypeOps[] = { Chain,
+ DAG.getConstant(uniqueCallSite, dl, MVT::i32),
InFlag };
Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps);
InFlag = Chain.getValue(1);
@@ -1452,11 +1473,11 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
EVT ObjectVT = getValueType(retTy);
unsigned NumElts = ObjectVT.getVectorNumElements();
EVT EltVT = ObjectVT.getVectorElementType();
- assert(nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters(
- F->getContext(), ObjectVT) == NumElts &&
+ assert(STI.getTargetLowering()->getNumRegisters(F->getContext(),
+ ObjectVT) == NumElts &&
"Vector was not scalarized");
unsigned sz = EltVT.getSizeInBits();
- bool needTruncate = sz < 8 ? true : false;
+ bool needTruncate = sz < 8;
if (NumElts == 1) {
// Just a simple load
@@ -1471,11 +1492,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
LoadRetVTs.push_back(EltVT);
LoadRetVTs.push_back(MVT::Other);
LoadRetVTs.push_back(MVT::Glue);
- SmallVector<SDValue, 4> LoadRetOps;
- LoadRetOps.push_back(Chain);
- LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
- LoadRetOps.push_back(DAG.getConstant(0, MVT::i32));
- LoadRetOps.push_back(InFlag);
+ SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32), InFlag};
SDValue retval = DAG.getMemIntrinsicNode(
NVPTXISD::LoadParam, dl,
DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
@@ -1501,11 +1519,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
LoadRetVTs.push_back(MVT::Other);
LoadRetVTs.push_back(MVT::Glue);
- SmallVector<SDValue, 4> LoadRetOps;
- LoadRetOps.push_back(Chain);
- LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
- LoadRetOps.push_back(DAG.getConstant(0, MVT::i32));
- LoadRetOps.push_back(InFlag);
+ SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32), InFlag};
SDValue retval = DAG.getMemIntrinsicNode(
NVPTXISD::LoadParamV2, dl,
DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
@@ -1547,11 +1562,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
LoadRetVTs.push_back(MVT::Other);
LoadRetVTs.push_back(MVT::Glue);
- SmallVector<SDValue, 4> LoadRetOps;
- LoadRetOps.push_back(Chain);
- LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
- LoadRetOps.push_back(DAG.getConstant(Ofst, MVT::i32));
- LoadRetOps.push_back(InFlag);
+ SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
+ DAG.getConstant(Ofst, dl, MVT::i32), InFlag};
SDValue retval = DAG.getMemIntrinsicNode(
Opc, dl, DAG.getVTList(LoadRetVTs),
LoadRetOps, EltVT, MachinePointerInfo());
@@ -1583,7 +1595,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
unsigned sz = VTs[i].getSizeInBits();
unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]);
- bool needTruncate = sz < 8 ? true : false;
+ bool needTruncate = sz < 8;
if (VTs[i].isInteger() && (sz < 8))
sz = 8;
@@ -1605,11 +1617,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
LoadRetVTs.push_back(MVT::Other);
LoadRetVTs.push_back(MVT::Glue);
- SmallVector<SDValue, 4> LoadRetOps;
- LoadRetOps.push_back(Chain);
- LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
- LoadRetOps.push_back(DAG.getConstant(Offsets[i], MVT::i32));
- LoadRetOps.push_back(InFlag);
+ SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
+ DAG.getConstant(Offsets[i], dl, MVT::i32),
+ InFlag};
SDValue retval = DAG.getMemIntrinsicNode(
NVPTXISD::LoadParam, dl,
DAG.getVTList(LoadRetVTs), LoadRetOps,
@@ -1624,8 +1634,10 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
}
- Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true),
- DAG.getIntPtrConstant(uniqueCallSite + 1, true),
+ Chain = DAG.getCALLSEQ_END(Chain,
+ DAG.getIntPtrConstant(uniqueCallSite, dl, true),
+ DAG.getIntPtrConstant(uniqueCallSite + 1, dl,
+ true),
InFlag, dl);
uniqueCallSite++;
@@ -1651,7 +1663,7 @@ NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
unsigned NumSubElem = VVT.getVectorNumElements();
for (unsigned j = 0; j < NumSubElem; ++j) {
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
- DAG.getIntPtrConstant(j)));
+ DAG.getIntPtrConstant(j, dl)));
}
}
return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Ops);
@@ -1675,7 +1687,7 @@ SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op,
SDValue ShAmt = Op.getOperand(2);
unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
- if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) {
+ if (VTBits == 32 && STI.getSmVersion() >= 35) {
// For 32bit and sm35, we can use the funnel shift 'shf' instruction.
// {dHi, dLo} = {aHi, aLo} >> Amt
@@ -1700,16 +1712,18 @@ SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op,
// dHi = aHi >> Amt
SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
- DAG.getConstant(VTBits, MVT::i32), ShAmt);
+ DAG.getConstant(VTBits, dl, MVT::i32),
+ ShAmt);
SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
- DAG.getConstant(VTBits, MVT::i32));
+ DAG.getConstant(VTBits, dl, MVT::i32));
SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
- DAG.getConstant(VTBits, MVT::i32), ISD::SETGE);
+ DAG.getConstant(VTBits, dl, MVT::i32),
+ ISD::SETGE);
SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
@@ -1735,7 +1749,7 @@ SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
- if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) {
+ if (VTBits == 32 && STI.getSmVersion() >= 35) {
// For 32bit and sm35, we can use the funnel shift 'shf' instruction.
// {dHi, dLo} = {aHi, aLo} << Amt
@@ -1760,16 +1774,18 @@ SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
// dHi = (aHi << Amt) | (aLo >> (size-Amt))
SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
- DAG.getConstant(VTBits, MVT::i32), ShAmt);
+ DAG.getConstant(VTBits, dl, MVT::i32),
+ ShAmt);
SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
- DAG.getConstant(VTBits, MVT::i32));
+ DAG.getConstant(VTBits, dl, MVT::i32));
SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
- DAG.getConstant(VTBits, MVT::i32), ISD::SETGE);
+ DAG.getConstant(VTBits, dl, MVT::i32),
+ ISD::SETGE);
SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
@@ -1803,11 +1819,29 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRA_PARTS:
case ISD::SRL_PARTS:
return LowerShiftRightParts(Op, DAG);
+ case ISD::SELECT:
+ return LowerSelect(Op, DAG);
default:
llvm_unreachable("Custom lowering not defined for operation");
}
}
+SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Op0 = Op->getOperand(0);
+ SDValue Op1 = Op->getOperand(1);
+ SDValue Op2 = Op->getOperand(2);
+ SDLoc DL(Op.getNode());
+
+ assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1");
+
+ Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
+ Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
+ SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select);
+
+ return Trunc;
+}
+
SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
if (Op.getValueType() == MVT::i1)
return LowerLOADi1(Op, DAG);
@@ -1924,16 +1958,14 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
// Then the split values
for (unsigned i = 0; i < NumElts; ++i) {
SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
- DAG.getIntPtrConstant(i));
+ DAG.getIntPtrConstant(i, DL));
if (NeedExt)
ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
Ops.push_back(ExtVal);
}
// Then any remaining arguments
- for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) {
- Ops.push_back(N->getOperand(i));
- }
+ Ops.append(N->op_begin() + 2, N->op_end());
SDValue NewSt = DAG.getMemIntrinsicNode(
Opcode, DL, DAG.getVTList(MVT::Other), Ops,
@@ -2029,13 +2061,13 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
const Function *F = MF.getFunction();
const AttributeSet &PAL = F->getAttributes();
- const TargetLowering *TLI = DAG.getSubtarget().getTargetLowering();
+ const TargetLowering *TLI = STI.getTargetLowering();
SDValue Root = DAG.getRoot();
std::vector<SDValue> OutChains;
bool isKernel = llvm::isKernelFunction(*F);
- bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+ bool isABI = (STI.getSmVersion() >= 20);
assert(isABI && "Non-ABI compilation is not supported");
if (!isABI)
return Chain;
@@ -2070,7 +2102,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
(theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
: nullptr))) {
assert(isKernel && "Only kernels can have image/sampler params");
- InVals.push_back(DAG.getConstant(i + 1, MVT::i32));
+ InVals.push_back(DAG.getConstant(i + 1, dl, MVT::i32));
continue;
}
@@ -2109,7 +2141,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
// to newly created nodes. The SDNodes for params have to
// appear in the same order as their order of appearance
// in the original function. "idx+1" holds that order.
- if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) {
+ if (!PAL.hasAttribute(i + 1, Attribute::ByVal)) {
if (Ty->isAggregateType()) {
SmallVector<EVT, 16> vtparts;
SmallVector<uint64_t, 16> offsets;
@@ -2132,7 +2164,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
llvm::ADDRESS_SPACE_PARAM));
SDValue srcAddr =
DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
- DAG.getConstant(offsets[parti], getPointerTy()));
+ DAG.getConstant(offsets[parti], dl, getPointerTy()));
unsigned partAlign =
aggregateIsPacked ? 1
: TD->getABITypeAlignment(
@@ -2197,9 +2229,9 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
P.getNode()->setIROrder(idx + 1);
SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
- DAG.getIntPtrConstant(0));
+ DAG.getIntPtrConstant(0, dl));
SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
- DAG.getIntPtrConstant(1));
+ DAG.getIntPtrConstant(1, dl));
if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) {
Elt0 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt0);
@@ -2232,7 +2264,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
llvm::ADDRESS_SPACE_PARAM));
SDValue SrcAddr =
DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
- DAG.getConstant(Ofst, getPointerTy()));
+ DAG.getConstant(Ofst, dl, getPointerTy()));
SDValue P = DAG.getLoad(
VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false,
false, true,
@@ -2244,7 +2276,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
if (i + j >= NumElts)
break;
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
- DAG.getIntPtrConstant(j));
+ DAG.getIntPtrConstant(j, dl));
if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits())
Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt);
InVals.push_back(Elt);
@@ -2302,7 +2334,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
else {
SDValue p2 = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
- DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p);
+ DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, dl, MVT::i32), p);
InVals.push_back(p2);
}
}
@@ -2333,7 +2365,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
Type *RetTy = F->getReturnType();
const DataLayout *TD = getDataLayout();
- bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
+ bool isABI = (STI.getSmVersion() >= 20);
assert(isABI && "Non-ABI compilation is not supported");
if (!isABI)
return Chain;
@@ -2356,7 +2388,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// We only have one element, so just directly store it
if (NeedExtend)
StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
- SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal };
+ SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal };
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
DAG.getVTList(MVT::Other), Ops,
EltVT, MachinePointerInfo());
@@ -2371,7 +2403,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal1);
}
- SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal0,
+ SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal0,
StoreVal1 };
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl,
DAG.getVTList(MVT::Other), Ops,
@@ -2403,7 +2435,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SDValue StoreVal;
SmallVector<SDValue, 8> Ops;
Ops.push_back(Chain);
- Ops.push_back(DAG.getConstant(Offset, MVT::i32));
+ Ops.push_back(DAG.getConstant(Offset, dl, MVT::i32));
unsigned Opc = NVPTXISD::StoreRetvalV2;
EVT ExtendedVT = (NeedExtend) ? MVT::i16 : OutVals[0].getValueType();
@@ -2468,7 +2500,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
if (TheValType.isVector())
TmpVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
TheValType.getVectorElementType(), TmpVal,
- DAG.getIntPtrConstant(j));
+ DAG.getIntPtrConstant(j, dl));
EVT TheStoreType = ValVTs[i];
if (RetTy->isIntegerTy() &&
TD->getTypeAllocSizeInBits(RetTy) < 32) {
@@ -2482,7 +2514,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SDValue Ops[] = {
Chain,
- DAG.getConstant(Offsets[i], MVT::i32),
+ DAG.getConstant(Offsets[i], dl, MVT::i32),
TmpVal };
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
DAG.getVTList(MVT::Other), Ops,
@@ -3753,7 +3785,8 @@ NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
}
std::pair<unsigned, const TargetRegisterClass *>
-NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ const std::string &Constraint,
MVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
@@ -3774,7 +3807,7 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
}
}
- return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+ return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
/// getFunctionAlignment - Return the Log2 alignment of this function.
@@ -3885,7 +3918,7 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
const SDNode *left = N0.getOperand(0).getNode();
const SDNode *right = N0.getOperand(1).getNode();
- if (dyn_cast<ConstantSDNode>(left) || dyn_cast<ConstantSDNode>(right))
+ if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right))
opIsLive = true;
if (!opIsLive)
@@ -4103,6 +4136,7 @@ static SDValue TryMULWIDECombine(SDNode *N,
return SDValue();
}
+ SDLoc DL(N);
unsigned OptSize = MulType.getSizeInBits() >> 1;
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
@@ -4125,7 +4159,7 @@ static SDValue TryMULWIDECombine(SDNode *N,
unsigned BitWidth = MulType.getSizeInBits();
if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) {
APInt MulVal = APInt(BitWidth, 1) << ShiftAmt;
- RHS = DCI.DAG.getConstant(MulVal, MulType);
+ RHS = DCI.DAG.getConstant(MulVal, DL, MulType);
} else {
return SDValue();
}
@@ -4147,9 +4181,9 @@ static SDValue TryMULWIDECombine(SDNode *N,
// Truncate the operands to the correct size. Note that these are just for
// type consistency and will (likely) be eliminated in later phases.
SDValue TruncLHS =
- DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, LHS);
+ DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, LHS);
SDValue TruncRHS =
- DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, RHS);
+ DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, RHS);
unsigned Opc;
if (Signed) {
@@ -4158,7 +4192,7 @@ static SDValue TryMULWIDECombine(SDNode *N,
Opc = NVPTXISD::MUL_WIDE_UNSIGNED;
}
- return DCI.DAG.getNode(Opc, SDLoc(N), MulType, TruncLHS, TruncRHS);
+ return DCI.DAG.getNode(Opc, DL, MulType, TruncLHS, TruncRHS);
}
/// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
@@ -4196,7 +4230,7 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
default: break;
case ISD::ADD:
case ISD::FADD:
- return PerformADDCombine(N, DCI, nvptxSubtarget, OptLevel);
+ return PerformADDCombine(N, DCI, STI, OptLevel);
case ISD::MUL:
return PerformMULCombine(N, DCI, OptLevel);
case ISD::SHL:
@@ -4281,15 +4315,12 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
}
}
- SmallVector<SDValue, 8> OtherOps;
-
// Copy regular operands
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- OtherOps.push_back(N->getOperand(i));
+ SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end());
// The select routine does not have access to the LoadSDNode instance, so
// pass along the extension information
- OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType()));
+ OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL));
SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
LD->getMemoryVT(),
@@ -4398,8 +4429,7 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
OtherOps.push_back(Chain); // Chain
// Skip operand 1 (intrinsic ID)
// Others
- for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i)
- OtherOps.push_back(N->getOperand(i));
+ OtherOps.append(N->op_begin() + 2, N->op_end());
MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
@@ -4430,9 +4460,7 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
"Custom handling of non-i8 ldu/ldg?");
// Just copy all operands as-is
- SmallVector<SDValue, 4> Ops;
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- Ops.push_back(N->getOperand(i));
+ SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end());
// Force output to i16
SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
@@ -4490,10 +4518,9 @@ NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {
delete DwarfLocSection;
delete DwarfARangesSection;
delete DwarfRangesSection;
- delete DwarfMacroInfoSection;
}
-const MCSection *
+MCSection *
NVPTXTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
SectionKind Kind, Mangler &Mang,
const TargetMachine &TM) const {
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index b3fea3f..5142ae3 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -21,7 +21,7 @@
namespace llvm {
namespace NVPTXISD {
-enum NodeType {
+enum NodeType : unsigned {
// Start the numbering from where ISD NodeType finishes.
FIRST_NUMBER = ISD::BUILTIN_OP_END,
Wrapper,
@@ -436,7 +436,8 @@ class NVPTXSubtarget;
//===--------------------------------------------------------------------===//
class NVPTXTargetLowering : public TargetLowering {
public:
- explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM);
+ explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM,
+ const NVPTXSubtarget &STI);
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -469,7 +470,8 @@ public:
ConstraintType
getConstraintType(const std::string &Constraint) const override;
std::pair<unsigned, const TargetRegisterClass *>
- getRegForInlineAsmConstraint(const std::string &Constraint,
+ getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ const std::string &Constraint,
MVT VT) const override;
SDValue LowerFormalArguments(
@@ -495,6 +497,12 @@ public:
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ // FIXME: Map different constraints differently.
+ return InlineAsm::Constraint_m;
+ }
+
const NVPTXTargetMachine *nvTM;
// PTX always uses 32-bit shift amounts
@@ -510,7 +518,7 @@ public:
bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
private:
- const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
+ const NVPTXSubtarget &STI; // cache the subtarget here
SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx,
EVT = MVT::i32) const;
@@ -529,6 +537,8 @@ private:
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const;
+
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 740ca03..dabc3be 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -28,9 +28,7 @@ using namespace llvm;
// Pin the vtable to this file.
void NVPTXInstrInfo::anchor() {}
-// FIXME: Add the subtarget support on this constructor.
-NVPTXInstrInfo::NVPTXInstrInfo(NVPTXSubtarget &STI)
- : NVPTXGenInstrInfo(), RegInfo(STI) {}
+NVPTXInstrInfo::NVPTXInstrInfo() : NVPTXGenInstrInfo(), RegInfo() {}
void NVPTXInstrInfo::copyPhysReg(
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
index 6de7536..9b5d491 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -27,7 +27,7 @@ class NVPTXInstrInfo : public NVPTXGenInstrInfo {
const NVPTXRegisterInfo RegInfo;
virtual void anchor();
public:
- explicit NVPTXInstrInfo(NVPTXSubtarget &STI);
+ explicit NVPTXInstrInfo();
const NVPTXRegisterInfo &getRegisterInfo() const { return RegInfo; }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 2c571c4..6fdd60f 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -117,24 +117,24 @@ def F32ConstOne : Operand<f32>, PatLeaf<(f32 fpimm)>, SDNodeXForm<fpimm, [{
//===----------------------------------------------------------------------===//
-def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">;
-def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">;
-def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">;
+def hasAtomRedG32 : Predicate<"Subtarget->hasAtomRedG32()">;
+def hasAtomRedS32 : Predicate<"Subtarget->hasAtomRedS32()">;
+def hasAtomRedGen32 : Predicate<"Subtarget->hasAtomRedGen32()">;
def useAtomRedG32forGen32 :
- Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">;
-def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">;
-def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">;
-def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">;
-def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">;
+ Predicate<"!Subtarget->hasAtomRedGen32() && Subtarget->hasAtomRedG32()">;
+def hasBrkPt : Predicate<"Subtarget->hasBrkPt()">;
+def hasAtomRedG64 : Predicate<"Subtarget->hasAtomRedG64()">;
+def hasAtomRedS64 : Predicate<"Subtarget->hasAtomRedS64()">;
+def hasAtomRedGen64 : Predicate<"Subtarget->hasAtomRedGen64()">;
def useAtomRedG64forGen64 :
- Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">;
-def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
-def hasVote : Predicate<"Subtarget.hasVote()">;
-def hasDouble : Predicate<"Subtarget.hasDouble()">;
-def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
-def hasLDG : Predicate<"Subtarget.hasLDG()">;
-def hasLDU : Predicate<"Subtarget.hasLDU()">;
-def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
+ Predicate<"!Subtarget->hasAtomRedGen64() && Subtarget->hasAtomRedG64()">;
+def hasAtomAddF32 : Predicate<"Subtarget->hasAtomAddF32()">;
+def hasVote : Predicate<"Subtarget->hasVote()">;
+def hasDouble : Predicate<"Subtarget->hasDouble()">;
+def reqPTX20 : Predicate<"Subtarget->reqPTX20()">;
+def hasLDG : Predicate<"Subtarget->hasLDG()">;
+def hasLDU : Predicate<"Subtarget->hasLDU()">;
+def hasGenericLdSt : Predicate<"Subtarget->hasGenericLdSt()">;
def doF32FTZ : Predicate<"useF32FTZ()">;
def doNoF32FTZ : Predicate<"!useF32FTZ()">;
@@ -150,12 +150,12 @@ def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">;
def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">;
def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">;
-def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
-def noHWROT32 : Predicate<"!Subtarget.hasHWROT32()">;
+def hasHWROT32 : Predicate<"Subtarget->hasHWROT32()">;
+def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">;
def true : Predicate<"1">;
-def hasPTX31 : Predicate<"Subtarget.getPTXVersion() >= 31">;
+def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">;
//===----------------------------------------------------------------------===//
@@ -452,13 +452,13 @@ def Int4Const : PatLeaf<(imm), [{
def SHL2MUL32 : SDNodeXForm<imm, [{
const APInt &v = N->getAPIntValue();
APInt temp(32, 1);
- return CurDAG->getTargetConstant(temp.shl(v), MVT::i32);
+ return CurDAG->getTargetConstant(temp.shl(v), SDLoc(N), MVT::i32);
}]>;
def SHL2MUL16 : SDNodeXForm<imm, [{
const APInt &v = N->getAPIntValue();
APInt temp(16, 1);
- return CurDAG->getTargetConstant(temp.shl(v), MVT::i16);
+ return CurDAG->getTargetConstant(temp.shl(v), SDLoc(N), MVT::i16);
}]>;
def MULWIDES64
@@ -1138,7 +1138,7 @@ def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst),
[]>;
def SUB_FRM_32 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32);
+ return CurDAG->getTargetConstant(32-N->getZExtValue(), SDLoc(N), MVT::i32);
}]>;
def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
@@ -1189,7 +1189,7 @@ def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
[]>;
def SUB_FRM_64 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(64-N->getZExtValue(), MVT::i32);
+ return CurDAG->getTargetConstant(64-N->getZExtValue(), SDLoc(N), MVT::i32);
}]>;
def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)),
@@ -1356,11 +1356,6 @@ defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>;
defm SELP_f32 : SELP_PATTERN<"f32", Float32Regs, f32imm, fpimm>;
defm SELP_f64 : SELP_PATTERN<"f64", Float64Regs, f64imm, fpimm>;
-// Special select for predicate operands
-def : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
- (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
- (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
-
//
// Funnnel shift in clamp mode
//
@@ -1659,12 +1654,12 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
(SET_f64ir fpimm:$a, Float64Regs:$b, Mode)>;
}
-defm FSetGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>;
-defm FSetLT : FSET_FORMAT<setolt, CmpLT, CmpLT_FTZ>;
-defm FSetGE : FSET_FORMAT<setoge, CmpGE, CmpGE_FTZ>;
-defm FSetLE : FSET_FORMAT<setole, CmpLE, CmpLE_FTZ>;
-defm FSetEQ : FSET_FORMAT<setoeq, CmpEQ, CmpEQ_FTZ>;
-defm FSetNE : FSET_FORMAT<setone, CmpNE, CmpNE_FTZ>;
+defm FSetOGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>;
+defm FSetOLT : FSET_FORMAT<setolt, CmpLT, CmpLT_FTZ>;
+defm FSetOGE : FSET_FORMAT<setoge, CmpGE, CmpGE_FTZ>;
+defm FSetOLE : FSET_FORMAT<setole, CmpLE, CmpLE_FTZ>;
+defm FSetOEQ : FSET_FORMAT<setoeq, CmpEQ, CmpEQ_FTZ>;
+defm FSetONE : FSET_FORMAT<setone, CmpNE, CmpNE_FTZ>;
defm FSetUGT : FSET_FORMAT<setugt, CmpGTU, CmpGTU_FTZ>;
defm FSetULT : FSET_FORMAT<setult, CmpLTU, CmpLTU_FTZ>;
@@ -1673,6 +1668,13 @@ defm FSetULE : FSET_FORMAT<setule, CmpLEU, CmpLEU_FTZ>;
defm FSetUEQ : FSET_FORMAT<setueq, CmpEQU, CmpEQU_FTZ>;
defm FSetUNE : FSET_FORMAT<setune, CmpNEU, CmpNEU_FTZ>;
+defm FSetGT : FSET_FORMAT<setgt, CmpGT, CmpGT_FTZ>;
+defm FSetLT : FSET_FORMAT<setlt, CmpLT, CmpLT_FTZ>;
+defm FSetGE : FSET_FORMAT<setge, CmpGE, CmpGE_FTZ>;
+defm FSetLE : FSET_FORMAT<setle, CmpLE, CmpLE_FTZ>;
+defm FSetEQ : FSET_FORMAT<seteq, CmpEQ, CmpEQ_FTZ>;
+defm FSetNE : FSET_FORMAT<setne, CmpNE, CmpNE_FTZ>;
+
defm FSetNUM : FSET_FORMAT<seto, CmpNUM, CmpNUM_FTZ>;
defm FSetNAN : FSET_FORMAT<setuo, CmpNAN, CmpNAN_FTZ>;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index f0c3663..6ab0fad 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "NVPTXLowerAggrCopies.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/StackProtector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@@ -22,10 +24,33 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "nvptx"
using namespace llvm;
-namespace llvm { FunctionPass *createLowerAggrCopies(); }
+namespace {
+// actual analysis class, which is a functionpass
+struct NVPTXLowerAggrCopies : public FunctionPass {
+ static char ID;
+
+ NVPTXLowerAggrCopies() : FunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addPreserved<StackProtector>();
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ static const unsigned MaxAggrCopySize = 128;
+
+ const char *getPassName() const override {
+ return "Lower aggregate copies/intrinsics into loops";
+ }
+};
+} // namespace
char NVPTXLowerAggrCopies::ID = 0;
@@ -45,8 +70,8 @@ static void convertTransferToLoop(
// srcAddr and dstAddr are expected to be pointer types,
// so no check is made here.
- unsigned srcAS = dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace();
- unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
+ unsigned srcAS = cast<PointerType>(srcAddr->getType())->getAddressSpace();
+ unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace();
// Cast pointers to (char *)
srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS));
@@ -59,9 +84,11 @@ static void convertTransferToLoop(
ind->addIncoming(ConstantInt::get(indType, 0), origBB);
// load from srcAddr+ind
- Value *val = loop.CreateLoad(loop.CreateGEP(srcAddr, ind), srcVolatile);
+ Value *val = loop.CreateLoad(loop.CreateGEP(loop.getInt8Ty(), srcAddr, ind),
+ srcVolatile);
// store at dstAddr+ind
- loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), dstVolatile);
+ loop.CreateStore(val, loop.CreateGEP(loop.getInt8Ty(), dstAddr, ind),
+ dstVolatile);
// The value for ind coming from backedge is (ind + 1)
Value *newind = loop.CreateAdd(ind, ConstantInt::get(indType, 1));
@@ -81,7 +108,7 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
origBB->getTerminator()->setSuccessor(0, loopBB);
IRBuilder<> builder(origBB, origBB->getTerminator());
- unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
+ unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace();
// Cast pointer to the type of value getting stored
dstAddr =
@@ -91,7 +118,7 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
PHINode *ind = loop.CreatePHI(len->getType(), 0);
ind->addIncoming(ConstantInt::get(len->getType(), 0), origBB);
- loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), false);
+ loop.CreateStore(val, loop.CreateGEP(val->getType(), dstAddr, ind), false);
Value *newind = loop.CreateAdd(ind, ConstantInt::get(len->getType(), 1));
ind->addIncoming(newind, loopBB);
@@ -104,7 +131,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
SmallVector<MemTransferInst *, 4> aggrMemcpys;
SmallVector<MemSetInst *, 4> aggrMemsets;
- const DataLayout *DL = &getAnalysis<DataLayoutPass>().getDataLayout();
+ const DataLayout &DL = F.getParent()->getDataLayout();
LLVMContext &Context = F.getParent()->getContext();
//
@@ -117,10 +144,10 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
++II) {
if (LoadInst *load = dyn_cast<LoadInst>(II)) {
- if (load->hasOneUse() == false)
+ if (!load->hasOneUse())
continue;
- if (DL->getTypeStoreSize(load->getType()) < MaxAggrCopySize)
+ if (DL.getTypeStoreSize(load->getType()) < MaxAggrCopySize)
continue;
User *use = load->user_back();
@@ -166,7 +193,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
StoreInst *store = dyn_cast<StoreInst>(*load->user_begin());
Value *srcAddr = load->getOperand(0);
Value *dstAddr = store->getOperand(1);
- unsigned numLoads = DL->getTypeStoreSize(load->getType());
+ unsigned numLoads = DL.getTypeStoreSize(load->getType());
Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads);
convertTransferToLoop(store, srcAddr, dstAddr, len, load->isVolatile(),
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
index da301d5..3c39f53 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
@@ -15,35 +15,10 @@
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H
#define LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/StackProtector.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/Pass.h"
-
namespace llvm {
+class FunctionPass;
-// actual analysis class, which is a functionpass
-struct NVPTXLowerAggrCopies : public FunctionPass {
- static char ID;
-
- NVPTXLowerAggrCopies() : FunctionPass(ID) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DataLayoutPass>();
- AU.addPreserved<MachineFunctionAnalysis>();
- AU.addPreserved<StackProtector>();
- }
-
- bool runOnFunction(Function &F) override;
-
- static const unsigned MaxAggrCopySize = 128;
-
- const char *getPassName() const override {
- return "Lower aggregate copies/intrinsics into loops";
- }
-};
-
-extern FunctionPass *createLowerAggrCopies();
+FunctionPass *createLowerAggrCopies();
}
#endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
index 3149399..68dfbb7 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
@@ -35,7 +35,8 @@ namespace llvm {
void initializeNVPTXLowerStructArgsPass(PassRegistry &);
}
-class LLVM_LIBRARY_VISIBILITY NVPTXLowerStructArgs : public FunctionPass {
+namespace {
+class NVPTXLowerStructArgs : public FunctionPass {
bool runOnFunction(Function &F) override;
void handleStructPtrArgs(Function &);
@@ -48,6 +49,7 @@ public:
return "Copy structure (byval *) arguments to stack";
}
};
+} // namespace
char NVPTXLowerStructArgs::ID = 1;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp
index 137248b..779b65e 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp
@@ -45,3 +45,13 @@ void NVPTXFloatMCExpr::PrintImpl(raw_ostream &OS) const {
OS << std::string(NumHex - HexStr.length(), '0');
OS << utohexstr(API.getZExtValue());
}
+
+const NVPTXGenericMCSymbolRefExpr*
+NVPTXGenericMCSymbolRefExpr::Create(const MCSymbolRefExpr *SymExpr,
+ MCContext &Ctx) {
+ return new (Ctx) NVPTXGenericMCSymbolRefExpr(SymExpr);
+}
+
+void NVPTXGenericMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const {
+ OS << "generic(" << *SymExpr << ")";
+}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h
index d39a394..8c6b219 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h
@@ -29,8 +29,8 @@ private:
const VariantKind Kind;
const APFloat Flt;
- explicit NVPTXFloatMCExpr(VariantKind _Kind, APFloat _Flt)
- : Kind(_Kind), Flt(_Flt) {}
+ explicit NVPTXFloatMCExpr(VariantKind Kind, APFloat Flt)
+ : Kind(Kind), Flt(Flt) {}
public:
/// @name Construction
@@ -68,9 +68,7 @@ public:
return false;
}
void visitUsedExpr(MCStreamer &Streamer) const override {};
- const MCSection *FindAssociatedSection() const override {
- return nullptr;
- }
+ MCSection *FindAssociatedSection() const override { return nullptr; }
// There are no TLS NVPTXMCExprs at the moment.
void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
@@ -79,6 +77,48 @@ public:
return E->getKind() == MCExpr::Target;
}
};
+
+/// A wrapper for MCSymbolRefExpr that tells the assembly printer that the
+/// symbol should be enclosed by generic().
+class NVPTXGenericMCSymbolRefExpr : public MCTargetExpr {
+private:
+ const MCSymbolRefExpr *SymExpr;
+
+ explicit NVPTXGenericMCSymbolRefExpr(const MCSymbolRefExpr *_SymExpr)
+ : SymExpr(_SymExpr) {}
+
+public:
+ /// @name Construction
+ /// @{
+
+ static const NVPTXGenericMCSymbolRefExpr
+ *Create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx);
+
+ /// @}
+ /// @name Accessors
+ /// @{
+
+ /// getOpcode - Get the kind of this expression.
+ const MCSymbolRefExpr *getSymbolExpr() const { return SymExpr; }
+
+ /// @}
+
+ void PrintImpl(raw_ostream &OS) const override;
+ bool EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const override {
+ return false;
+ }
+ void visitUsedExpr(MCStreamer &Streamer) const override {};
+ MCSection *FindAssociatedSection() const override { return nullptr; }
+
+ // There are no TLS NVPTXMCExprs at the moment.
+ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
+
+ static bool classof(const MCExpr *E) {
+ return E->getKind() == MCExpr::Target;
+ }
+ };
} // end namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index a1e1b9e..5fd69a6 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -48,9 +48,9 @@ MachineFunctionPass *llvm::createNVPTXPrologEpilogPass() {
char NVPTXPrologEpilogPass::ID = 0;
bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering &TFI = *TM.getSubtargetImpl()->getFrameLowering();
- const TargetRegisterInfo &TRI = *TM.getSubtargetImpl()->getRegisterInfo();
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetFrameLowering &TFI = *STI.getFrameLowering();
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
bool Modified = false;
calculateFrameObjectOffsets(MF);
@@ -68,7 +68,7 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
}
// Add function prolog/epilog
- TFI.emitPrologue(MF);
+ TFI.emitPrologue(MF, MF.front());
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
// If last instruction is a return instruction, add an epilogue
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 358ccce..6e97f9e 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -71,15 +71,14 @@ std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) {
}
}
-NVPTXRegisterInfo::NVPTXRegisterInfo(const NVPTXSubtarget &st)
- : NVPTXGenRegisterInfo(0), Is64Bit(st.is64Bit()) {}
+NVPTXRegisterInfo::NVPTXRegisterInfo() : NVPTXGenRegisterInfo(0) {}
#define GET_REGINFO_TARGET_DESC
#include "NVPTXGenRegisterInfo.inc"
/// NVPTX Callee Saved Registers
const MCPhysReg *
-NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *) const {
static const MCPhysReg CalleeSavedRegs[] = { 0 };
return CalleeSavedRegs;
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h
index d2e6733..c310a9c 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h
@@ -22,27 +22,20 @@
#include "NVPTXGenRegisterInfo.inc"
namespace llvm {
-
-// Forward Declarations.
-class TargetInstrInfo;
-class NVPTXSubtarget;
-
class NVPTXRegisterInfo : public NVPTXGenRegisterInfo {
private:
- bool Is64Bit;
// Hold Strings that can be free'd all together with NVPTXRegisterInfo
ManagedStringPool ManagedStrPool;
public:
- NVPTXRegisterInfo(const NVPTXSubtarget &st);
+ NVPTXRegisterInfo();
//------------------------------------------------------
// Pure virtual functions from TargetRegisterInfo
//------------------------------------------------------
// NVPTX callee saved registers
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index b7f53c7..e83f735 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -16,6 +16,7 @@
#include "NVPTX.h"
#include "NVPTXMachineFunctionInfo.h"
#include "NVPTXSubtarget.h"
+#include "NVPTXTargetMachine.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -142,8 +143,9 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) {
case NVPTX::LD_i64_avar: {
// The handle is a parameter value being loaded, replace with the
// parameter symbol
- const NVPTXSubtarget &ST = MF.getTarget().getSubtarget<NVPTXSubtarget>();
- if (ST.getDrvInterface() == NVPTX::CUDA) {
+ const NVPTXTargetMachine &TM =
+ static_cast<const NVPTXTargetMachine &>(MF.getTarget());
+ if (TM.getDrvInterface() == NVPTX::CUDA) {
// For CUDA, we preserve the param loads coming from function arguments
return false;
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h b/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h
index f1d3cb4..0d2627d 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h
@@ -26,7 +26,7 @@ namespace llvm {
class NVPTXSection : public MCSection {
virtual void anchor();
public:
- NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K) {}
+ NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K, nullptr) {}
virtual ~NVPTXSection() {}
/// Override this as NVPTX has its own way of printing switching
@@ -36,11 +36,8 @@ public:
const MCExpr *Subsection) const override {}
/// Base address of PTX sections is zero.
- bool isBaseAddressKnownZero() const override { return true; }
bool UseCodeAlign() const override { return false; }
bool isVirtualSection() const override { return false; }
- std::string getLabelBeginName() const override { return ""; }
- std::string getLabelEndName() const override { return ""; }
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 3d52532..069d6e1 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "NVPTXSubtarget.h"
+#include "NVPTXTargetMachine.h"
using namespace llvm;
@@ -25,17 +26,6 @@ using namespace llvm;
// Pin the vtable to this file.
void NVPTXSubtarget::anchor() {}
-static std::string computeDataLayout(bool is64Bit) {
- std::string Ret = "e";
-
- if (!is64Bit)
- Ret += "-p:32:32";
-
- Ret += "-i64:64-v16:16-v32:32-n16:32:64";
-
- return Ret;
-}
-
NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
// Provide the default CPU if we don't have one.
@@ -54,18 +44,18 @@ NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
}
NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, const TargetMachine &TM,
- bool is64Bit)
- : NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0),
- SmVersion(20), DL(computeDataLayout(is64Bit)),
- InstrInfo(initializeSubtargetDependencies(CPU, FS)),
- TLInfo((const NVPTXTargetMachine &)TM), TSInfo(&DL),
- FrameLowering(*this) {
-
- Triple T(TT);
-
- if (T.getOS() == Triple::NVCL)
- drvInterface = NVPTX::NVCL;
- else
- drvInterface = NVPTX::CUDA;
+ const std::string &FS,
+ const NVPTXTargetMachine &TM)
+ : NVPTXGenSubtargetInfo(TT, CPU, FS), PTXVersion(0), SmVersion(20), TM(TM),
+ InstrInfo(), TLInfo(TM, initializeSubtargetDependencies(CPU, FS)),
+ TSInfo(TM.getDataLayout()), FrameLowering() {}
+
+bool NVPTXSubtarget::hasImageHandles() const {
+ // Enable handles for Kepler+, where CUDA supports indirect surfaces and
+ // textures
+ if (TM.getDrvInterface() == NVPTX::CUDA)
+ return (SmVersion >= 30);
+
+ // Disabled, otherwise
+ return false;
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index fb2d404..e9833e5 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -32,8 +32,6 @@ namespace llvm {
class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
virtual void anchor();
std::string TargetName;
- NVPTX::DrvInterface drvInterface;
- bool Is64Bit;
// PTX version x.y is represented as 10*x+y, e.g. 3.1 == 31
unsigned PTXVersion;
@@ -41,7 +39,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
// SM version x.y is represented as 10*x+y, e.g. 3.1 == 31
unsigned int SmVersion;
- const DataLayout DL; // Calculates type size & alignment
+ const NVPTXTargetMachine &TM;
NVPTXInstrInfo InstrInfo;
NVPTXTargetLowering TLInfo;
TargetSelectionDAGInfo TSInfo;
@@ -55,13 +53,12 @@ public:
/// of the specified module.
///
NVPTXSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, const TargetMachine &TM, bool is64Bit);
+ const std::string &FS, const NVPTXTargetMachine &TM);
const TargetFrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
const NVPTXInstrInfo *getInstrInfo() const override { return &InstrInfo; }
- const DataLayout *getDataLayout() const override { return &DL; }
const NVPTXRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
@@ -95,20 +92,9 @@ public:
}
inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); }
inline bool hasROT64() const { return SmVersion >= 20; }
-
- bool hasImageHandles() const {
- // Enable handles for Kepler+, where CUDA supports indirect surfaces and
- // textures
- if (getDrvInterface() == NVPTX::CUDA)
- return (SmVersion >= 30);
-
- // Disabled, otherwise
- return false;
- }
- bool is64Bit() const { return Is64Bit; }
+ bool hasImageHandles() const;
unsigned int getSmVersion() const { return SmVersion; }
- NVPTX::DrvInterface getDrvInterface() const { return drvInterface; }
std::string getTargetName() const { return TargetName; }
unsigned getPTXVersion() const { return PTXVersion; }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index c7f9507..ac27c30 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -17,6 +17,7 @@
#include "NVPTXAllocaHoisting.h"
#include "NVPTXLowerAggrCopies.h"
#include "NVPTXTargetObjectFile.h"
+#include "NVPTXTargetTransformInfo.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
@@ -24,12 +25,12 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormattedStream.h"
@@ -49,6 +50,7 @@ using namespace llvm;
namespace llvm {
void initializeNVVMReflectPass(PassRegistry&);
void initializeGenericToNVVMPass(PassRegistry&);
+void initializeNVPTXAllocaHoistingPass(PassRegistry &);
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
void initializeNVPTXLowerStructArgsPass(PassRegistry &);
@@ -63,20 +65,37 @@ extern "C" void LLVMInitializeNVPTXTarget() {
// but it's very NVPTX-specific.
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
initializeGenericToNVVMPass(*PassRegistry::getPassRegistry());
+ initializeNVPTXAllocaHoistingPass(*PassRegistry::getPassRegistry());
initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry());
initializeNVPTXFavorNonGenericAddrSpacesPass(
*PassRegistry::getPassRegistry());
initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry());
}
+static std::string computeDataLayout(bool is64Bit) {
+ std::string Ret = "e";
+
+ if (!is64Bit)
+ Ret += "-p:32:32";
+
+ Ret += "-i64:64-v16:16-v32:32-n16:32:64";
+
+ return Ret;
+}
+
NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool is64bit)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- TLOF(make_unique<NVPTXTargetObjectFile>()),
- Subtarget(TT, CPU, FS, *this, is64bit) {
+ : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM,
+ CM, OL),
+ is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()),
+ Subtarget(TT, CPU, FS, *this) {
+ if (Triple(TT).getOS() == Triple::NVCL)
+ drvInterface = NVPTX::NVCL;
+ else
+ drvInterface = NVPTX::CUDA;
initAsmInfo();
}
@@ -124,12 +143,9 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
return PassConfig;
}
-void NVPTXTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
- // Add first the target-independent BasicTTI pass, then our NVPTX pass. This
- // allows the NVPTX pass to delegate to the target independent layer when
- // appropriate.
- PM.add(createBasicTargetTransformInfoPass(this));
- PM.add(createNVPTXTargetTransformInfoPass(this));
+TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
+ return TargetIRAnalysis(
+ [this](Function &) { return TargetTransformInfo(NVPTXTTIImpl(this)); });
}
void NVPTXPassConfig::addIRPasses() {
@@ -148,29 +164,27 @@ void NVPTXPassConfig::addIRPasses() {
addPass(createNVPTXAssignValidGlobalNamesPass());
addPass(createGenericToNVVMPass());
addPass(createNVPTXFavorNonGenericAddrSpacesPass());
+ // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave
+ // them unused. We could remove dead code in an ad-hoc manner, but that
+ // requires manual work and might be error-prone.
+ addPass(createDeadCodeEliminationPass());
addPass(createSeparateConstOffsetFromGEPPass());
- // The SeparateConstOffsetFromGEP pass creates variadic bases that can be used
- // by multiple GEPs. Run GVN or EarlyCSE to really reuse them. GVN generates
- // significantly better code than EarlyCSE for some of our benchmarks.
+ // ReassociateGEPs exposes more opportunites for SLSR. See
+ // the example in reassociate-geps-and-slsr.ll.
+ addPass(createStraightLineStrengthReducePass());
+ // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
+ // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE
+ // for some of our benchmarks.
if (getOptLevel() == CodeGenOpt::Aggressive)
addPass(createGVNPass());
else
addPass(createEarlyCSEPass());
- // Both FavorNonGenericAddrSpaces and SeparateConstOffsetFromGEP may leave
- // some dead code. We could remove dead code in an ad-hoc manner, but that
- // requires manual work and might be error-prone.
- //
- // The FavorNonGenericAddrSpaces pass shortcuts unnecessary addrspacecasts,
- // and leave them unused.
- //
- // SeparateConstOffsetFromGEP rebuilds a new index from the old index, and the
- // old index and some of its intermediate results may become unused.
- addPass(createDeadCodeEliminationPass());
+ // Run NaryReassociate after EarlyCSE/GVN to be more effective.
+ addPass(createNaryReassociatePass());
}
bool NVPTXPassConfig::addInstSelector() {
- const NVPTXSubtarget &ST =
- getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>();
+ const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
addPass(createLowerAggrCopies());
addPass(createAllocaHoisting());
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
index fa97ec8..2cd10e8 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -25,7 +25,9 @@ namespace llvm {
/// NVPTXTargetMachine
///
class NVPTXTargetMachine : public LLVMTargetMachine {
+ bool is64bit;
std::unique_ptr<TargetLoweringObjectFile> TLOF;
+ NVPTX::DrvInterface drvInterface;
NVPTXSubtarget Subtarget;
// Hold Strings that can be free'd all together with NVPTXTargetMachine
@@ -37,9 +39,12 @@ public:
CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit);
~NVPTXTargetMachine() override;
-
- const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; }
-
+ const NVPTXSubtarget *getSubtargetImpl(const Function &) const override {
+ return &Subtarget;
+ }
+ const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ bool is64Bit() const { return is64bit; }
+ NVPTX::DrvInterface getDrvInterface() const { return drvInterface; }
ManagedStringPool *getManagedStrPool() const {
return const_cast<ManagedStringPool *>(&ManagedStrPool);
}
@@ -47,7 +52,7 @@ public:
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
// Emission of machine code through MCJIT is not supported.
- bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &,
+ bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_pwrite_stream &,
bool = true) override {
return true;
}
@@ -55,8 +60,7 @@ public:
return TLOF.get();
}
- /// \brief Register NVPTX analysis passes with a pass manager.
- void addAnalysisPasses(PassManagerBase &PM) override;
+ TargetIRAnalysis getTargetIRAnalysis() override;
}; // NVPTXTargetMachine.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index 00ceca5..5ecdc87 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -41,7 +41,6 @@ public:
DwarfLocSection = nullptr;
DwarfARangesSection = nullptr;
DwarfRangesSection = nullptr;
- DwarfMacroInfoSection = nullptr;
}
virtual ~NVPTXTargetObjectFile();
@@ -83,24 +82,22 @@ public:
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
DwarfRangesSection =
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
- DwarfMacroInfoSection =
- new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
}
- const MCSection *getSectionForConstant(SectionKind Kind,
- const Constant *C) const override {
+ MCSection *getSectionForConstant(SectionKind Kind,
+ const Constant *C) const override {
return ReadOnlySection;
}
- const MCSection *getExplicitSectionGlobal(const GlobalValue *GV,
- SectionKind Kind, Mangler &Mang,
- const TargetMachine &TM) const override {
+ MCSection *getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler &Mang,
+ const TargetMachine &TM) const override {
return DataSection;
}
- const MCSection *
- SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
- const TargetMachine &TM) const override;
+ MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler &Mang,
+ const TargetMachine &TM) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index b09d0d4..dc81802 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -1,4 +1,4 @@
-//===-- NVPTXTargetTransformInfo.cpp - NVPTX specific TTI pass ---------===//
+//===-- NVPTXTargetTransformInfo.cpp - NVPTX specific TTI -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,19 +6,13 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// \file
-// This file implements a TargetTransformInfo analysis pass specific to the
-// NVPTX target machine. It uses the target's detailed information to provide
-// more precise answers to certain TTI queries, while letting the target
-// independent and default TTI implementations handle the rest.
-//
-//===----------------------------------------------------------------------===//
-#include "NVPTXTargetMachine.h"
+#include "NVPTXTargetTransformInfo.h"
+#include "NVPTXUtilities.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
#include "llvm/Target/TargetLowering.h"
@@ -26,69 +20,79 @@ using namespace llvm;
#define DEBUG_TYPE "NVPTXtti"
-// Declare the pass initialization routine locally as target-specific passes
-// don't have a target-wide initialization entry point, and so we rely on the
-// pass constructor initialization.
-namespace llvm {
-void initializeNVPTXTTIPass(PassRegistry &);
-}
-
-namespace {
-
-class NVPTXTTI final : public ImmutablePass, public TargetTransformInfo {
- const NVPTXTargetLowering *TLI;
-public:
- NVPTXTTI() : ImmutablePass(ID), TLI(nullptr) {
- llvm_unreachable("This pass cannot be directly constructed");
- }
-
- NVPTXTTI(const NVPTXTargetMachine *TM)
- : ImmutablePass(ID), TLI(TM->getSubtargetImpl()->getTargetLowering()) {
- initializeNVPTXTTIPass(*PassRegistry::getPassRegistry());
+// Whether the given intrinsic reads threadIdx.x/y/z.
+static bool readsThreadIndex(const IntrinsicInst *II) {
+ switch (II->getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::nvvm_read_ptx_sreg_tid_x:
+ case Intrinsic::nvvm_read_ptx_sreg_tid_y:
+ case Intrinsic::nvvm_read_ptx_sreg_tid_z:
+ return true;
}
+}
- void initializePass() override { pushTTIStack(this); }
+static bool readsLaneId(const IntrinsicInst *II) {
+ return II->getIntrinsicID() == Intrinsic::ptx_read_laneid;
+}
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- TargetTransformInfo::getAnalysisUsage(AU);
+// Whether the given intrinsic is an atomic instruction in PTX.
+static bool isNVVMAtomic(const IntrinsicInst *II) {
+ switch (II->getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::nvvm_atomic_load_add_f32:
+ case Intrinsic::nvvm_atomic_load_inc_32:
+ case Intrinsic::nvvm_atomic_load_dec_32:
+ return true;
}
+}
- /// Pass identification.
- static char ID;
-
- /// Provide necessary pointer adjustments for the two base classes.
- void *getAdjustedAnalysisPointer(const void *ID) override {
- if (ID == &TargetTransformInfo::ID)
- return (TargetTransformInfo *)this;
- return this;
+bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) {
+ // Without inter-procedural analysis, we conservatively assume that arguments
+ // to __device__ functions are divergent.
+ if (const Argument *Arg = dyn_cast<Argument>(V))
+ return !isKernelFunction(*Arg->getParent());
+
+ if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ // Without pointer analysis, we conservatively assume values loaded from
+ // generic or local address space are divergent.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ unsigned AS = LI->getPointerAddressSpace();
+ return AS == ADDRESS_SPACE_GENERIC || AS == ADDRESS_SPACE_LOCAL;
+ }
+ // Atomic instructions may cause divergence. Atomic instructions are
+ // executed sequentially across all threads in a warp. Therefore, an earlier
+ // executed thread may see different memory inputs than a later executed
+ // thread. For example, suppose *a = 0 initially.
+ //
+ // atom.global.add.s32 d, [a], 1
+ //
+ // returns 0 for the first thread that enters the critical region, and 1 for
+ // the second thread.
+ if (I->isAtomic())
+ return true;
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ // Instructions that read threadIdx are obviously divergent.
+ if (readsThreadIndex(II) || readsLaneId(II))
+ return true;
+ // Handle the NVPTX atomic instrinsics that cannot be represented as an
+ // atomic IR instruction.
+ if (isNVVMAtomic(II))
+ return true;
+ }
+ // Conservatively consider the return value of function calls as divergent.
+ // We could analyze callees with bodies more precisely using
+ // inter-procedural analysis.
+ if (isa<CallInst>(I))
+ return true;
}
- bool hasBranchDivergence() const override;
-
- unsigned getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
- OperandValueKind Opd2Info = OK_AnyValue,
- OperandValueProperties Opd1PropInfo = OP_None,
- OperandValueProperties Opd2PropInfo = OP_None) const override;
-};
-
-} // end anonymous namespace
-
-INITIALIZE_AG_PASS(NVPTXTTI, TargetTransformInfo, "NVPTXtti",
- "NVPTX Target Transform Info", true, true, false)
-char NVPTXTTI::ID = 0;
-
-ImmutablePass *
-llvm::createNVPTXTargetTransformInfoPass(const NVPTXTargetMachine *TM) {
- return new NVPTXTTI(TM);
+ return false;
}
-bool NVPTXTTI::hasBranchDivergence() const { return true; }
-
-unsigned NVPTXTTI::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
- OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
- OperandValueProperties Opd2PropInfo) const {
+unsigned NVPTXTTIImpl::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
+ TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
+ TTI::OperandValueProperties Opd2PropInfo) {
// Legalize the type.
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
@@ -96,8 +100,8 @@ unsigned NVPTXTTI::getArithmeticInstrCost(
switch (ISD) {
default:
- return TargetTransformInfo::getArithmeticInstrCost(
- Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+ Opd1PropInfo, Opd2PropInfo);
case ISD::ADD:
case ISD::MUL:
case ISD::XOR:
@@ -109,7 +113,7 @@ unsigned NVPTXTTI::getArithmeticInstrCost(
if (LT.second.SimpleTy == MVT::i64)
return 2 * LT.first;
// Delegate other cases to the basic TTI.
- return TargetTransformInfo::getArithmeticInstrCost(
- Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+ Opd1PropInfo, Opd2PropInfo);
}
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
new file mode 100644
index 0000000..4280888
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -0,0 +1,76 @@
+//===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file a TargetTransformInfo::Concept conforming object specific to the
+/// NVPTX target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H
+
+#include "NVPTX.h"
+#include "NVPTXTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
+ typedef BasicTTIImplBase<NVPTXTTIImpl> BaseT;
+ typedef TargetTransformInfo TTI;
+ friend BaseT;
+
+ const NVPTXSubtarget *ST;
+ const NVPTXTargetLowering *TLI;
+
+ const NVPTXSubtarget *getST() const { return ST; };
+ const NVPTXTargetLowering *getTLI() const { return TLI; };
+
+public:
+ explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM)
+ : BaseT(TM), ST(TM->getSubtargetImpl()), TLI(ST->getTargetLowering()) {}
+
+ // Provide value semantics. MSVC requires that we spell all of these out.
+ NVPTXTTIImpl(const NVPTXTTIImpl &Arg)
+ : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
+ NVPTXTTIImpl(NVPTXTTIImpl &&Arg)
+ : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+ TLI(std::move(Arg.TLI)) {}
+ NVPTXTTIImpl &operator=(const NVPTXTTIImpl &RHS) {
+ BaseT::operator=(static_cast<const BaseT &>(RHS));
+ ST = RHS.ST;
+ TLI = RHS.TLI;
+ return *this;
+ }
+ NVPTXTTIImpl &operator=(NVPTXTTIImpl &&RHS) {
+ BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
+ ST = std::move(RHS.ST);
+ TLI = std::move(RHS.TLI);
+ return *this;
+ }
+
+ bool hasBranchDivergence() { return true; }
+
+ bool isSourceOfDivergence(const Value *V);
+
+ unsigned getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
+ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
+ TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index cf1feac..1f178af 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -293,12 +293,9 @@ bool llvm::isKernelFunction(const Function &F) {
unsigned x = 0;
bool retval = llvm::findOneNVVMAnnotation(
&F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], x);
- if (retval == false) {
+ if (!retval) {
// There is no NVVM metadata, check the calling convention
- if (F.getCallingConv() == llvm::CallingConv::PTX_Kernel)
- return true;
- else
- return false;
+ return F.getCallingConv() == llvm::CallingConv::PTX_Kernel;
}
return (x == 1);
}
@@ -307,7 +304,7 @@ bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) {
std::vector<unsigned> Vs;
bool retval = llvm::findAllNVVMAnnotation(
&F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], Vs);
- if (retval == false)
+ if (!retval)
return false;
for (int i = 0, e = Vs.size(); i < e; i++) {
unsigned v = Vs[i];
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td b/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td
index 85aa34e..a237247 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td
@@ -735,19 +735,19 @@ def VecShuffle_v2i64 : NVPTXVecInst<(outs V2I64Regs:$dst),
def ShuffleMask0 : SDNodeXForm<vector_shuffle, [{
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- return CurDAG->getTargetConstant(SVOp->getMaskElt(0), MVT::i32);
+ return CurDAG->getTargetConstant(SVOp->getMaskElt(0), SDLoc(N), MVT::i32);
}]>;
def ShuffleMask1 : SDNodeXForm<vector_shuffle, [{
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- return CurDAG->getTargetConstant(SVOp->getMaskElt(1), MVT::i32);
+ return CurDAG->getTargetConstant(SVOp->getMaskElt(1), SDLoc(N), MVT::i32);
}]>;
def ShuffleMask2 : SDNodeXForm<vector_shuffle, [{
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- return CurDAG->getTargetConstant(SVOp->getMaskElt(2), MVT::i32);
+ return CurDAG->getTargetConstant(SVOp->getMaskElt(2), SDLoc(N), MVT::i32);
}]>;
def ShuffleMask3 : SDNodeXForm<vector_shuffle, [{
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- return CurDAG->getTargetConstant(SVOp->getMaskElt(3), MVT::i32);
+ return CurDAG->getTargetConstant(SVOp->getMaskElt(3), SDLoc(N), MVT::i32);
}]>;
// The spurious call is here to silence a compiler warning about N being
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXutil.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXutil.cpp
deleted file mode 100644
index 5f074b3..0000000
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXutil.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-//===-- NVPTXutil.cpp - Functions exported to CodeGen --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the functions that can be used in CodeGen.
-//
-//===----------------------------------------------------------------------===//
-
-#include "NVPTXutil.h"
-#include "NVPTX.h"
-
-using namespace llvm;
-
-namespace llvm {
-
-bool isParamLoad(const MachineInstr *MI) {
- if ((MI->getOpcode() != NVPTX::LD_i32_avar) &&
- (MI->getOpcode() != NVPTX::LD_i64_avar))
- return false;
- if (MI->getOperand(2).isImm() == false)
- return false;
- if (MI->getOperand(2).getImm() != NVPTX::PTXLdStInstCode::PARAM)
- return false;
- return true;
-}
-
-#define DATA_MASK 0x7f
-#define DIGIT_WIDTH 7
-#define MORE_BYTES 0x80
-
-static int encode_leb128(uint64_t val, int *nbytes, char *space, int splen) {
- char *a;
- char *end = space + splen;
-
- a = space;
- do {
- unsigned char uc;
-
- if (a >= end)
- return 1;
- uc = val & DATA_MASK;
- val >>= DIGIT_WIDTH;
- if (val != 0)
- uc |= MORE_BYTES;
- *a = uc;
- a++;
- } while (val);
- *nbytes = a - space;
- return 0;
-}
-
-#undef DATA_MASK
-#undef DIGIT_WIDTH
-#undef MORE_BYTES
-
-uint64_t encode_leb128(const char *str) {
- union {
- uint64_t x;
- char a[8];
- } temp64;
-
- temp64.x = 0;
-
- for (unsigned i = 0, e = strlen(str); i != e; ++i)
- temp64.a[i] = str[e - 1 - i];
-
- char encoded[16];
- int nbytes;
-
- int retval = encode_leb128(temp64.x, &nbytes, encoded, 16);
-
- (void) retval;
- assert(retval == 0 && "Encoding to leb128 failed");
-
- assert(nbytes <= 8 &&
- "Cannot support register names with leb128 encoding > 8 bytes");
-
- temp64.x = 0;
- for (int i = 0; i < nbytes; ++i)
- temp64.a[i] = encoded[i];
-
- return temp64.x;
-}
-
-} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXutil.h b/contrib/llvm/lib/Target/NVPTX/NVPTXutil.h
deleted file mode 100644
index 1915dac..0000000
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXutil.h
+++ /dev/null
@@ -1,25 +0,0 @@
-//===-- NVPTXutil.h - Functions exported to CodeGen --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the functions that can be used in CodeGen.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H
-#define LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H
-
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
-
-namespace llvm {
-bool isParamLoad(const MachineInstr *);
-uint64_t encode_leb128(const char *str);
-}
-
-#endif
diff --git a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index a8d6b95..5e375b7 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -29,6 +29,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include <map>
#include <sstream>
@@ -137,6 +138,26 @@ bool NVVMReflect::handleFunction(Function *ReflectFunction) {
// ConstantArray can be found successfully, see if it can be
// found in VarMap. If so, replace the uses of CallInst with the
// value found in VarMap. If not, replace the use with value 0.
+
+ // IR for __nvvm_reflect calls differs between CUDA versions:
+ // CUDA 6.5 and earlier uses this sequence:
+ // %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8
+ // (i8 addrspace(4)* getelementptr inbounds
+ // ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0))
+ // %reflect = tail call i32 @__nvvm_reflect(i8* %ptr)
+ //
+ // Value returned by Sym->getOperand(0) is a Constant with a
+ // ConstantDataSequential operand which can be converted to string and used
+ // for lookup.
+ //
+ // CUDA 7.0 does it slightly differently:
+ // %reflect = call i32 @__nvvm_reflect(i8* addrspacecast
+ // (i8 addrspace(1)* getelementptr inbounds
+ // ([8 x i8], [8 x i8] addrspace(1)* @str, i32 0, i32 0) to i8*))
+ //
+ // In this case, we get a Constant with a GlobalVariable operand and we need
+ // to dig deeper to find its initializer with the string we'll use for lookup.
+
for (User *U : ReflectFunction->users()) {
assert(isa<CallInst>(U) && "Only a call instruction can use _reflect");
CallInst *Reflect = cast<CallInst>(U);
@@ -158,16 +179,23 @@ bool NVVMReflect::handleFunction(Function *ReflectFunction) {
const Value *Sym = GEP->getOperand(0);
assert(isa<Constant>(Sym) && "Format of _reflect function not recognized");
- const Constant *SymStr = cast<Constant>(Sym);
+ const Value *Operand = cast<Constant>(Sym)->getOperand(0);
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Operand)) {
+ // For CUDA-7.0 style __nvvm_reflect calls we need to find operand's
+ // initializer.
+ assert(GV->hasInitializer() &&
+ "Format of _reflect function not recognized");
+ const Constant *Initializer = GV->getInitializer();
+ Operand = Initializer;
+ }
- assert(isa<ConstantDataSequential>(SymStr->getOperand(0)) &&
+ assert(isa<ConstantDataSequential>(Operand) &&
"Format of _reflect function not recognized");
-
- assert(cast<ConstantDataSequential>(SymStr->getOperand(0))->isCString() &&
+ assert(cast<ConstantDataSequential>(Operand)->isCString() &&
"Format of _reflect function not recognized");
std::string ReflectArg =
- cast<ConstantDataSequential>(SymStr->getOperand(0))->getAsString();
+ cast<ConstantDataSequential>(Operand)->getAsString();
ReflectArg = ReflectArg.substr(0, ReflectArg.size() - 1);
DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n");
OpenPOWER on IntegriCloud