diff options
author | dim <dim@FreeBSD.org> | 2015-05-27 20:26:41 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2015-05-27 20:26:41 +0000 |
commit | 5ef8fd3549d38e883a31881636be3dc2a275de20 (patch) | |
tree | bd13a22d9db57ccf3eddbc07b32c18109521d050 /contrib/llvm/lib/Target/NVPTX | |
parent | 77794ebe2d5718eb502c93ec32f8ccae4d8a0b7b (diff) | |
parent | 782067d0278612ee75d024b9b135c221c327e9e8 (diff) | |
download | FreeBSD-src-5ef8fd3549d38e883a31881636be3dc2a275de20.zip FreeBSD-src-5ef8fd3549d38e883a31881636be3dc2a275de20.tar.gz |
Merge llvm trunk r238337 from ^/vendor/llvm/dist, resolve conflicts, and
preserve our customizations, where necessary.
Diffstat (limited to 'contrib/llvm/lib/Target/NVPTX')
45 files changed, 1232 insertions, 945 deletions
diff --git a/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp index 80b2f62..ac92df9 100644 --- a/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp +++ b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp @@ -28,13 +28,9 @@ using namespace llvm; #include "NVPTXGenAsmWriter.inc" - NVPTXInstPrinter::NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) - : MCInstPrinter(MAI, MII, MRI) { - setAvailableFeatures(STI.getFeatureBits()); -} + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { // Decode the virtual register @@ -72,7 +68,7 @@ void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { } void NVPTXInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, - StringRef Annot) { + StringRef Annot, const MCSubtargetInfo &STI) { printInstruction(MI, OS); // Next always print the annotation. diff --git a/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h index 0496964..02c5a21 100644 --- a/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h +++ b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h @@ -25,10 +25,11 @@ class MCSubtargetInfo; class NVPTXInstPrinter : public MCInstPrinter { public: NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); + const MCRegisterInfo &MRI); void printRegName(raw_ostream &OS, unsigned RegNo) const override; - void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override; + void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot, + const MCSubtargetInfo &STI) override; // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp index 11d737e..b9df3d1 100644 --- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp @@ -39,6 +39,8 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(StringRef TT) { InlineAsmEnd = " inline asm"; SupportsDebugInformation = CompileForDebugging; + // PTX does not allow .align on functions. + HasFunctionAlignment = false; HasDotTypeDotSizeDirective = false; Data8bitsDirective = " .b8 "; diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp index 158ca90..d500105 100644 --- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp @@ -54,52 +54,39 @@ createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { static MCCodeGenInfo *createNVPTXMCCodeGenInfo( StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); - X->InitMCCodeGenInfo(RM, CM, OL); + X->initMCCodeGenInfo(RM, CM, OL); return X; } -static MCInstPrinter *createNVPTXMCInstPrinter(const Target &T, +static MCInstPrinter *createNVPTXMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { + const MCRegisterInfo &MRI) { if (SyntaxVariant == 0) - return new NVPTXInstPrinter(MAI, MII, MRI, STI); + return new NVPTXInstPrinter(MAI, MII, MRI); return nullptr; } // Force static initialization. extern "C" void LLVMInitializeNVPTXTargetMC() { - // Register the MC asm info. - RegisterMCAsmInfo<NVPTXMCAsmInfo> X(TheNVPTXTarget32); - RegisterMCAsmInfo<NVPTXMCAsmInfo> Y(TheNVPTXTarget64); - - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget32, - createNVPTXMCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget64, - createNVPTXMCCodeGenInfo); - - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget32, createNVPTXMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget64, createNVPTXMCInstrInfo); - - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget32, - createNVPTXMCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget64, - createNVPTXMCRegisterInfo); - - // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget32, - createNVPTXMCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget64, - createNVPTXMCSubtargetInfo); - - // Register the MCInstPrinter. - TargetRegistry::RegisterMCInstPrinter(TheNVPTXTarget32, - createNVPTXMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(TheNVPTXTarget64, - createNVPTXMCInstPrinter); + for (Target *T : {&TheNVPTXTarget32, &TheNVPTXTarget64}) { + // Register the MC asm info. + RegisterMCAsmInfo<NVPTXMCAsmInfo> X(*T); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(*T, createNVPTXMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(*T, createNVPTXMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(*T, createNVPTXMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(*T, createNVPTXMCSubtargetInfo); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(*T, createNVPTXMCInstPrinter); + } } diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h index 98821d2..bfd5123 100644 --- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h +++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h @@ -14,6 +14,8 @@ #ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H #define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H +#include <stdint.h> + namespace llvm { class Target; diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.h b/contrib/llvm/lib/Target/NVPTX/NVPTX.h index a3382eb..382525d 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTX.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.h @@ -59,7 +59,6 @@ inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) { llvm_unreachable("Unknown condition code"); } -ImmutablePass *createNVPTXTargetTransformInfoPass(const NVPTXTargetMachine *TM); FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel); ModulePass *createNVPTXAssignValidGlobalNamesPass(); diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.td b/contrib/llvm/lib/Target/NVPTX/NVPTX.td index 93fabf6..96abfa8 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTX.td +++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.td @@ -32,20 +32,28 @@ def SM21 : SubtargetFeature<"sm_21", "SmVersion", "21", "Target SM 2.1">; def SM30 : SubtargetFeature<"sm_30", "SmVersion", "30", "Target SM 3.0">; +def SM32 : SubtargetFeature<"sm_32", "SmVersion", "32", + "Target SM 3.2">; def SM35 : SubtargetFeature<"sm_35", "SmVersion", "35", "Target SM 3.5">; +def SM37 : SubtargetFeature<"sm_37", "SmVersion", "37", + "Target SM 3.7">; def SM50 : SubtargetFeature<"sm_50", "SmVersion", "50", "Target SM 5.0">; +def SM52 : SubtargetFeature<"sm_52", "SmVersion", "52", + "Target SM 5.2">; +def SM53 : SubtargetFeature<"sm_53", "SmVersion", "53", + "Target SM 5.3">; // PTX Versions -def PTX30 : SubtargetFeature<"ptx30", "PTXVersion", "30", - "Use PTX version 3.0">; -def PTX31 : SubtargetFeature<"ptx31", "PTXVersion", "31", - "Use PTX version 3.1">; def PTX32 : SubtargetFeature<"ptx32", "PTXVersion", "32", "Use PTX version 3.2">; def PTX40 : SubtargetFeature<"ptx40", "PTXVersion", "40", "Use PTX version 4.0">; +def PTX41 : SubtargetFeature<"ptx41", "PTXVersion", "41", + "Use PTX version 4.1">; +def PTX42 : SubtargetFeature<"ptx42", "PTXVersion", "42", + "Use PTX version 4.2">; //===----------------------------------------------------------------------===// // NVPTX supported processors. @@ -57,8 +65,12 @@ class Proc<string Name, list<SubtargetFeature> Features> def : Proc<"sm_20", [SM20]>; def : Proc<"sm_21", [SM21]>; def : Proc<"sm_30", [SM30]>; +def : Proc<"sm_32", [SM32, PTX40]>; def : Proc<"sm_35", [SM35]>; -def : Proc<"sm_50", [SM50]>; +def : Proc<"sm_37", [SM37, PTX41]>; +def : Proc<"sm_50", [SM50, PTX40]>; +def : Proc<"sm_52", [SM52, PTX41]>; +def : Proc<"sm_53", [SM53, PTX42]>; def NVPTXInstrInfo : InstrInfo { diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp index 1f37696..4f3ccf4 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp @@ -12,11 +12,33 @@ //===----------------------------------------------------------------------===// #include "NVPTXAllocaHoisting.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" +using namespace llvm; -namespace llvm { +namespace { +// Hoisting the alloca instructions in the non-entry blocks to the entry +// block. +class NVPTXAllocaHoisting : public FunctionPass { +public: + static char ID; // Pass ID + NVPTXAllocaHoisting() : FunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<MachineFunctionAnalysis>(); + AU.addPreserved<StackProtector>(); + } + + const char *getPassName() const override { + return "NVPTX specific alloca hoisting"; + } + + bool runOnFunction(Function &function) override; +}; +} // namespace bool NVPTXAllocaHoisting::runOnFunction(Function &function) { bool functionModified = false; @@ -36,11 +58,15 @@ bool NVPTXAllocaHoisting::runOnFunction(Function &function) { return functionModified; } -char NVPTXAllocaHoisting::ID = 1; -static RegisterPass<NVPTXAllocaHoisting> -X("alloca-hoisting", "Hoisting alloca instructions in non-entry " - "blocks to the entry block"); +char NVPTXAllocaHoisting::ID = 0; + +namespace llvm { +void initializeNVPTXAllocaHoistingPass(PassRegistry &); +} -FunctionPass *createAllocaHoisting() { return new NVPTXAllocaHoisting(); } +INITIALIZE_PASS( + NVPTXAllocaHoisting, "alloca-hoisting", + "Hoisting alloca instructions in non-entry blocks to the entry block", + false, false) -} // end namespace llvm +FunctionPass *llvm::createAllocaHoisting() { return new NVPTXAllocaHoisting; } diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h index c343980..7a6fc7d 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h @@ -14,38 +14,10 @@ #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H #define LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H -#include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/CodeGen/StackProtector.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/Pass.h" - namespace llvm { - class FunctionPass; -class Function; - -// Hoisting the alloca instructions in the non-entry blocks to the entry -// block. -class NVPTXAllocaHoisting : public FunctionPass { -public: - static char ID; // Pass ID - NVPTXAllocaHoisting() : FunctionPass(ID) {} - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<DataLayoutPass>(); - AU.addPreserved<MachineFunctionAnalysis>(); - AU.addPreserved<StackProtector>(); - } - - const char *getPassName() const override { - return "NVPTX specific alloca hoisting"; - } - - bool runOnFunction(Function &function) override; -}; extern FunctionPass *createAllocaHoisting(); - } // end namespace llvm #endif diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index beec9b2..3bbea40 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/DebugInfo.h" @@ -36,6 +37,7 @@ #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" @@ -45,6 +47,7 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TimeValue.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" #include <sstream> using namespace llvm; @@ -116,7 +119,7 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { DebugLoc curLoc = MI.getDebugLoc(); - if (prevDebugLoc.isUnknown() && curLoc.isUnknown()) + if (!prevDebugLoc && !curLoc) return; if (prevDebugLoc == curLoc) @@ -124,50 +127,43 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { prevDebugLoc = curLoc; - if (curLoc.isUnknown()) + if (!curLoc) return; - const MachineFunction *MF = MI.getParent()->getParent(); - //const TargetMachine &TM = MF->getTarget(); - - const LLVMContext &ctx = MF->getFunction()->getContext(); - DIScope Scope(curLoc.getScope(ctx)); - - assert((!Scope || Scope.isScope()) && - "Scope of a DebugLoc should be null or a DIScope."); + auto *Scope = cast_or_null<DIScope>(curLoc.getScope()); if (!Scope) return; - StringRef fileName(Scope.getFilename()); - StringRef dirName(Scope.getDirectory()); + StringRef fileName(Scope->getFilename()); + StringRef dirName(Scope->getDirectory()); SmallString<128> FullPathName = dirName; if (!dirName.empty() && !sys::path::is_absolute(fileName)) { sys::path::append(FullPathName, fileName); - fileName = FullPathName.str(); + fileName = FullPathName; } - if (filenameMap.find(fileName.str()) == filenameMap.end()) + if (filenameMap.find(fileName) == filenameMap.end()) return; // Emit the line from the source file. if (InterleaveSrc) - this->emitSrcInText(fileName.str(), curLoc.getLine()); + this->emitSrcInText(fileName, curLoc.getLine()); std::stringstream temp; - temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine() + temp << "\t.loc " << filenameMap[fileName] << " " << curLoc.getLine() << " " << curLoc.getCol(); - OutStreamer.EmitRawText(Twine(temp.str().c_str())); + OutStreamer->EmitRawText(temp.str()); } void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallString<128> Str; raw_svector_ostream OS(Str); - if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) + if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) emitLineNumberAsDotLoc(*MI); MCInst Inst; lowerToMCInst(MI, Inst); - EmitToStreamer(OutStreamer, Inst); + EmitToStreamer(*OutStreamer, Inst); } // Handle symbol backtracking for targets that do not support image handles @@ -229,19 +225,17 @@ void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) { const char *Sym = MFI->getImageHandleSymbol(Index); std::string *SymNamePtr = nvTM.getManagedStrPool()->getManagedString(Sym); - MCOp = GetSymbolRef(OutContext.GetOrCreateSymbol( + MCOp = GetSymbolRef(OutContext.getOrCreateSymbol( StringRef(SymNamePtr->c_str()))); } void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { OutMI.setOpcode(MI->getOpcode()); - const NVPTXSubtarget &ST = TM.getSubtarget<NVPTXSubtarget>(); - // Special: Do not mangle symbol operand of CALL_PROTOTYPE if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) { const MachineOperand &MO = MI->getOperand(0); OutMI.addOperand(GetSymbolRef( - OutContext.GetOrCreateSymbol(Twine(MO.getSymbolName())))); + OutContext.getOrCreateSymbol(Twine(MO.getSymbolName())))); return; } @@ -249,7 +243,7 @@ void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { const MachineOperand &MO = MI->getOperand(i); MCOperand MCOp; - if (!ST.hasImageHandles()) { + if (!nvptxSubtarget->hasImageHandles()) { if (lowerImageHandleOperand(MI, i, MCOp)) { OutMI.addOperand(MCOp); continue; @@ -266,13 +260,13 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, switch (MO.getType()) { default: llvm_unreachable("unknown operand type"); case MachineOperand::MO_Register: - MCOp = MCOperand::CreateReg(encodeVirtualRegister(MO.getReg())); + MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg())); break; case MachineOperand::MO_Immediate: - MCOp = MCOperand::CreateImm(MO.getImm()); + MCOp = MCOperand::createImm(MO.getImm()); break; case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( + MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create( MO.getMBB()->getSymbol(), OutContext)); break; case MachineOperand::MO_ExternalSymbol: @@ -288,11 +282,11 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, switch (Cnt->getType()->getTypeID()) { default: report_fatal_error("Unsupported FP type"); break; case Type::FloatTyID: - MCOp = MCOperand::CreateExpr( + MCOp = MCOperand::createExpr( NVPTXFloatMCExpr::CreateConstantFPSingle(Val, OutContext)); break; case Type::DoubleTyID: - MCOp = MCOperand::CreateExpr( + MCOp = MCOperand::createExpr( NVPTXFloatMCExpr::CreateConstantFPDouble(Val, OutContext)); break; } @@ -342,16 +336,16 @@ MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) { const MCExpr *Expr; Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, OutContext); - return MCOperand::CreateExpr(Expr); + return MCOperand::createExpr(Expr); } void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { - const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); - const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); + const DataLayout *TD = TM.getDataLayout(); + const TargetLowering *TLI = nvptxSubtarget->getTargetLowering(); Type *Ty = F->getReturnType(); - bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + bool isABI = (nvptxSubtarget->getSmVersion() >= 20); if (Ty->getTypeID() == Type::VoidTyID) return; @@ -418,6 +412,42 @@ void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF, printReturnValStr(F, O); } +// Return true if MBB is the header of a loop marked with +// llvm.loop.unroll.disable. +// TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll". +bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll( + const MachineBasicBlock &MBB) const { + MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>(); + // TODO: isLoopHeader() should take "const MachineBasicBlock *". + // We insert .pragma "nounroll" only to the loop header. + if (!LI.isLoopHeader(const_cast<MachineBasicBlock *>(&MBB))) + return false; + + // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore, + // we iterate through each back edge of the loop with header MBB, and check + // whether its metadata contains llvm.loop.unroll.disable. + for (auto I = MBB.pred_begin(); I != MBB.pred_end(); ++I) { + const MachineBasicBlock *PMBB = *I; + if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) { + // Edges from other loops to MBB are not back edges. + continue; + } + if (const BasicBlock *PBB = PMBB->getBasicBlock()) { + if (MDNode *LoopID = PBB->getTerminator()->getMetadata("llvm.loop")) { + if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable")) + return true; + } + } + } + return false; +} + +void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { + AsmPrinter::EmitBasicBlockStart(MBB); + if (isLoopHeaderOfNoUnroll(MBB)) + OutStreamer->EmitRawText(StringRef("\t.pragma \"nounroll\";\n")); +} + void NVPTXAsmPrinter::EmitFunctionEntryLabel() { SmallString<128> Str; raw_svector_ostream O(Str); @@ -445,39 +475,37 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() { if (llvm::isKernelFunction(*F)) emitKernelFunctionDirectives(*F, O); - OutStreamer.EmitRawText(O.str()); + OutStreamer->EmitRawText(O.str()); prevDebugLoc = DebugLoc(); } void NVPTXAsmPrinter::EmitFunctionBodyStart() { VRegMapping.clear(); - OutStreamer.EmitRawText(StringRef("{\n")); + OutStreamer->EmitRawText(StringRef("{\n")); setAndEmitFunctionVirtualRegisters(*MF); SmallString<128> Str; raw_svector_ostream O(Str); emitDemotedVars(MF->getFunction(), O); - OutStreamer.EmitRawText(O.str()); + OutStreamer->EmitRawText(O.str()); } void NVPTXAsmPrinter::EmitFunctionBodyEnd() { - OutStreamer.EmitRawText(StringRef("}\n")); + OutStreamer->EmitRawText(StringRef("}\n")); VRegMapping.clear(); } void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const { unsigned RegNo = MI->getOperand(0).getReg(); - const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); - if (TRI->isVirtualRegister(RegNo)) { - OutStreamer.AddComment(Twine("implicit-def: ") + - getVirtualRegisterName(RegNo)); + if (TargetRegisterInfo::isVirtualRegister(RegNo)) { + OutStreamer->AddComment(Twine("implicit-def: ") + + getVirtualRegisterName(RegNo)); } else { - OutStreamer.AddComment( - Twine("implicit-def: ") + - TM.getSubtargetImpl()->getRegisterInfo()->getName(RegNo)); + OutStreamer->AddComment(Twine("implicit-def: ") + + nvptxSubtarget->getRegisterInfo()->getName(RegNo)); } - OutStreamer.AddBlankLine(); + OutStreamer->AddBlankLine(); } void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, @@ -487,15 +515,15 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, // If none of reqntid* is specified, don't output reqntid directive. unsigned reqntidx, reqntidy, reqntidz; bool specified = false; - if (llvm::getReqNTIDx(F, reqntidx) == false) + if (!llvm::getReqNTIDx(F, reqntidx)) reqntidx = 1; else specified = true; - if (llvm::getReqNTIDy(F, reqntidy) == false) + if (!llvm::getReqNTIDy(F, reqntidy)) reqntidy = 1; else specified = true; - if (llvm::getReqNTIDz(F, reqntidz) == false) + if (!llvm::getReqNTIDz(F, reqntidz)) reqntidz = 1; else specified = true; @@ -509,15 +537,15 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, // If none of maxntid* is specified, don't output maxntid directive. unsigned maxntidx, maxntidy, maxntidz; specified = false; - if (llvm::getMaxNTIDx(F, maxntidx) == false) + if (!llvm::getMaxNTIDx(F, maxntidx)) maxntidx = 1; else specified = true; - if (llvm::getMaxNTIDy(F, maxntidy) == false) + if (!llvm::getMaxNTIDy(F, maxntidy)) maxntidy = 1; else specified = true; - if (llvm::getMaxNTIDz(F, maxntidz) == false) + if (!llvm::getMaxNTIDz(F, maxntidz)) maxntidz = 1; else specified = true; @@ -607,7 +635,7 @@ static bool usedInGlobalVarDef(const Constant *C) { return false; if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { - if (GV->getName().str() == "llvm.used") + if (GV->getName() == "llvm.used") return false; return true; } @@ -622,7 +650,7 @@ static bool usedInGlobalVarDef(const Constant *C) { static bool usedInOneFunc(const User *U, Function const *&oneFunc) { if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { - if (othergv->getName().str() == "llvm.used") + if (othergv->getName() == "llvm.used") return true; } @@ -638,7 +666,7 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) { } for (const User *UU : U->users()) - if (usedInOneFunc(UU, oneFunc) == false) + if (!usedInOneFunc(UU, oneFunc)) return false; return true; @@ -652,7 +680,7 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) { * 3. Is the global variable referenced only in one function? */ static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { - if (gv->hasInternalLinkage() == false) + if (!gv->hasInternalLinkage()) return false; const PointerType *Pty = gv->getType(); if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED) @@ -661,7 +689,7 @@ static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { const Function *oneFunc = nullptr; bool flag = usedInOneFunc(gv, oneFunc); - if (flag == false) + if (!flag) return false; if (!oneFunc) return false; @@ -746,37 +774,45 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { DbgFinder.processModule(M); unsigned i = 1; - for (DICompileUnit DIUnit : DbgFinder.compile_units()) { - StringRef Filename(DIUnit.getFilename()); - StringRef Dirname(DIUnit.getDirectory()); + for (const DICompileUnit *DIUnit : DbgFinder.compile_units()) { + StringRef Filename = DIUnit->getFilename(); + StringRef Dirname = DIUnit->getDirectory(); SmallString<128> FullPathName = Dirname; if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { sys::path::append(FullPathName, Filename); - Filename = FullPathName.str(); + Filename = FullPathName; } - if (filenameMap.find(Filename.str()) != filenameMap.end()) + if (filenameMap.find(Filename) != filenameMap.end()) continue; - filenameMap[Filename.str()] = i; - OutStreamer.EmitDwarfFileDirective(i, "", Filename.str()); + filenameMap[Filename] = i; + OutStreamer->EmitDwarfFileDirective(i, "", Filename); ++i; } - for (DISubprogram SP : DbgFinder.subprograms()) { - StringRef Filename(SP.getFilename()); - StringRef Dirname(SP.getDirectory()); + for (DISubprogram *SP : DbgFinder.subprograms()) { + StringRef Filename = SP->getFilename(); + StringRef Dirname = SP->getDirectory(); SmallString<128> FullPathName = Dirname; if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { sys::path::append(FullPathName, Filename); - Filename = FullPathName.str(); + Filename = FullPathName; } - if (filenameMap.find(Filename.str()) != filenameMap.end()) + if (filenameMap.find(Filename) != filenameMap.end()) continue; - filenameMap[Filename.str()] = i; + filenameMap[Filename] = i; ++i; } } bool NVPTXAsmPrinter::doInitialization(Module &M) { + // Construct a default subtarget off of the TargetMachine defaults. The + // rest of NVPTX isn't friendly to change subtargets per function and + // so the default TargetMachine will have all of the options. + StringRef TT = TM.getTargetTriple(); + StringRef CPU = TM.getTargetCPU(); + StringRef FS = TM.getTargetFeatureString(); + const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM); + const NVPTXSubtarget STI(TT, CPU, FS, NTM); SmallString<128> Str1; raw_svector_ostream OS1(Str1); @@ -791,26 +827,27 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) { const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) .Initialize(OutContext, TM); - Mang = new Mangler(TM.getSubtargetImpl()->getDataLayout()); + Mang = new Mangler(TM.getDataLayout()); // Emit header before any dwarf directives are emitted below. - emitHeader(M, OS1); - OutStreamer.EmitRawText(OS1.str()); + emitHeader(M, OS1, STI); + OutStreamer->EmitRawText(OS1.str()); // Already commented out //bool Result = AsmPrinter::doInitialization(M); // Emit module-level inline asm if it exists. if (!M.getModuleInlineAsm().empty()) { - OutStreamer.AddComment("Start of file scope inline assembly"); - OutStreamer.AddBlankLine(); - OutStreamer.EmitRawText(StringRef(M.getModuleInlineAsm())); - OutStreamer.AddBlankLine(); - OutStreamer.AddComment("End of file scope inline assembly"); - OutStreamer.AddBlankLine(); + OutStreamer->AddComment("Start of file scope inline assembly"); + OutStreamer->AddBlankLine(); + OutStreamer->EmitRawText(StringRef(M.getModuleInlineAsm())); + OutStreamer->AddBlankLine(); + OutStreamer->AddComment("End of file scope inline assembly"); + OutStreamer->AddBlankLine(); } - if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) + // If we're not NVCL we're CUDA, go ahead and emit filenames. + if (Triple(TM.getTargetTriple()).getOS() != Triple::NVCL) recordAndEmitFilenames(M); GlobalsEmitted = false; @@ -848,25 +885,27 @@ void NVPTXAsmPrinter::emitGlobals(const Module &M) { OS2 << '\n'; - OutStreamer.EmitRawText(OS2.str()); + OutStreamer->EmitRawText(OS2.str()); } -void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { +void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O, + const NVPTXSubtarget &STI) { O << "//\n"; O << "// Generated by LLVM NVPTX Back-End\n"; O << "//\n"; O << "\n"; - unsigned PTXVersion = nvptxSubtarget.getPTXVersion(); + unsigned PTXVersion = STI.getPTXVersion(); O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"; O << ".target "; - O << nvptxSubtarget.getTargetName(); + O << STI.getTargetName(); - if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) + const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM); + if (NTM.getDrvInterface() == NVPTX::NVCL) O << ", texmode_independent"; - if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { - if (!nvptxSubtarget.hasDouble()) + else { + if (!STI.hasDouble()) O << ", map_f64_to_f32"; } @@ -876,7 +915,7 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { O << "\n"; O << ".address_size "; - if (nvptxSubtarget.is64Bit()) + if (NTM.is64Bit()) O << "64"; else O << "32"; @@ -886,7 +925,6 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { } bool NVPTXAsmPrinter::doFinalization(Module &M) { - // If we did not emit any functions, then the global declarations have not // yet been emitted. if (!GlobalsEmitted) { @@ -948,7 +986,7 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) { void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, raw_ostream &O) { - if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { + if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) { if (V->hasExternalLinkage()) { if (isa<GlobalVariable>(V)) { const GlobalVariable *GVar = cast<GlobalVariable>(V); @@ -967,7 +1005,7 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, msg.append("Error: "); msg.append("Symbol "); if (V->hasName()) - msg.append(V->getName().str()); + msg.append(V->getName()); msg.append("has unsupported appending linkage type"); llvm_unreachable(msg.c_str()); } else if (!V->hasInternalLinkage() && @@ -992,7 +1030,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, GVar->getName().startswith("nvvm.")) return; - const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *TD = TM.getDataLayout(); // GlobalVariables are always constant pointers themselves. const PointerType *PTy = GVar->getType(); @@ -1103,7 +1141,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, const Function *demotedFunc = nullptr; if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) { - O << "// " << GVar->getName().str() << " has been demoted\n"; + O << "// " << GVar->getName() << " has been demoted\n"; if (localDecls.find(demotedFunc) != localDecls.end()) localDecls[demotedFunc].push_back(GVar); else { @@ -1142,7 +1180,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, if ((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) { const Constant *Initializer = GVar->getInitializer(); - // 'undef' is treated as there is no value spefied. + // 'undef' is treated as there is no value specified. if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) { O << " = "; printScalarConstant(Initializer, O); @@ -1151,9 +1189,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, // The frontend adds zero-initializer to variables that don't have an // initial value, so skip warning for this case. if (!GVar->getInitializer()->isNullValue()) { - std::string warnMsg = "initial value of '" + GVar->getName().str() + - "' is not allowed in addrspace(" + - llvm::utostr_32(PTy->getAddressSpace()) + ")"; + std::string warnMsg = + ("initial value of '" + GVar->getName() + + "' is not allowed in addrspace(" + + Twine(llvm::utostr_32(PTy->getAddressSpace())) + ")").str(); report_fatal_error(warnMsg.c_str()); } } @@ -1180,7 +1219,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, AggBuffer aggBuffer(ElementSize, O, *this); bufferAggregateConstant(Initializer, &aggBuffer); if (aggBuffer.numSymbols) { - if (nvptxSubtarget.is64Bit()) { + if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) { O << " .u64 " << *getSymbol(GVar) << "["; O << ElementSize / 8; } else { @@ -1278,7 +1317,7 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { case Type::DoubleTyID: return "f64"; case Type::PointerTyID: - if (nvptxSubtarget.is64Bit()) + if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) if (useB4PTR) return "b64"; else @@ -1295,7 +1334,7 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O) { - const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *TD = TM.getDataLayout(); // GlobalVariables are always constant pointers themselves. const PointerType *PTy = GVar->getType(); @@ -1369,50 +1408,22 @@ static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, int paramIndex, raw_ostream &O) { - if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || - (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) - O << *getSymbol(I->getParent()) << "_param_" << paramIndex; - else { - std::string argName = I->getName(); - const char *p = argName.c_str(); - while (*p) { - if (*p == '.') - O << "_"; - else - O << *p; - p++; - } - } + O << *getSymbol(I->getParent()) << "_param_" << paramIndex; } void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { - Function::const_arg_iterator I, E; - int i = 0; - - if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || - (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) { - O << *CurrentFnSym << "_param_" << paramIndex; - return; - } - - for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) { - if (i == paramIndex) { - printParamName(I, paramIndex, O); - return; - } - } - llvm_unreachable("paramIndex out of bound"); + O << *CurrentFnSym << "_param_" << paramIndex; } void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { - const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *TD = TM.getDataLayout(); const AttributeSet &PAL = F->getAttributes(); - const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); + const TargetLowering *TLI = nvptxSubtarget->getTargetLowering(); Function::const_arg_iterator I, E; unsigned paramIndex = 0; bool first = true; bool isKernelFunc = llvm::isKernelFunction(*F); - bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + bool isABI = (nvptxSubtarget->getSmVersion() >= 20); MVT thePointerTy = TLI->getPointerTy(); O << "(\n"; @@ -1431,21 +1442,21 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { if (isImage(*I)) { std::string sname = I->getName(); if (isImageWriteOnly(*I) || isImageReadWrite(*I)) { - if (nvptxSubtarget.hasImageHandles()) + if (nvptxSubtarget->hasImageHandles()) O << "\t.param .u64 .ptr .surfref "; else O << "\t.param .surfref "; O << *CurrentFnSym << "_param_" << paramIndex; } else { // Default image is read_only - if (nvptxSubtarget.hasImageHandles()) + if (nvptxSubtarget->hasImageHandles()) O << "\t.param .u64 .ptr .texref "; else O << "\t.param .texref "; O << *CurrentFnSym << "_param_" << paramIndex; } } else { - if (nvptxSubtarget.hasImageHandles()) + if (nvptxSubtarget->hasImageHandles()) O << "\t.param .u64 .ptr .samplerref "; else O << "\t.param .samplerref "; @@ -1455,7 +1466,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { } } - if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) { + if (!PAL.hasAttribute(paramIndex + 1, Attribute::ByVal)) { if (Ty->isAggregateType() || Ty->isVectorTy()) { // Just print .param .align <a> .b8 .param[size]; // <a> = PAL.getparamalignment @@ -1478,7 +1489,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { // Special handling for pointer arguments to kernel O << "\t.param .u" << thePointerTy.getSizeInBits() << " "; - if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) { + if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() != + NVPTX::CUDA) { Type *ETy = PTy->getElementType(); int addrSpace = PTy->getAddressSpace(); switch (addrSpace) { @@ -1607,7 +1619,7 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( if (NumBytes) { O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n"; - if (nvptxSubtarget.is64Bit()) { + if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) { O << "\t.reg .b64 \t%SP;\n"; O << "\t.reg .b64 \t%SPL;\n"; } else { @@ -1655,7 +1667,7 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( } } - OutStreamer.EmitRawText(O.str()); + OutStreamer->EmitRawText(O.str()); } void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { @@ -1738,7 +1750,7 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer) { - const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *TD = TM.getDataLayout(); if (isa<UndefValue>(CPV) || CPV->isNullValue()) { int s = TD->getTypeAllocSize(CPV->getType()); @@ -1754,12 +1766,11 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, case Type::IntegerTyID: { const Type *ETy = CPV->getType(); if (ETy == Type::getInt8Ty(CPV->getContext())) { - unsigned char c = - (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); + unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue(); ptr = &c; aggBuffer->addBytes(ptr, 1, Bytes); } else if (ETy == Type::getInt16Ty(CPV->getContext())) { - short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); + short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue(); ptr = (unsigned char *)&int16; aggBuffer->addBytes(ptr, 2, Bytes); } else if (ETy == Type::getInt32Ty(CPV->getContext())) { @@ -1770,7 +1781,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, break; } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { if (const ConstantInt *constInt = dyn_cast<ConstantInt>( - ConstantFoldConstantExpression(Cexpr, TD))) { + ConstantFoldConstantExpression(Cexpr, *TD))) { int int32 = (int)(constInt->getZExtValue()); ptr = (unsigned char *)&int32; aggBuffer->addBytes(ptr, 4, Bytes); @@ -1778,7 +1789,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, } if (Cexpr->getOpcode() == Instruction::PtrToInt) { Value *v = Cexpr->getOperand(0)->stripPointerCasts(); - aggBuffer->addSymbol(v); + aggBuffer->addSymbol(v, Cexpr->getOperand(0)); aggBuffer->addZeros(4); break; } @@ -1792,7 +1803,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, break; } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { if (const ConstantInt *constInt = dyn_cast<ConstantInt>( - ConstantFoldConstantExpression(Cexpr, TD))) { + ConstantFoldConstantExpression(Cexpr, *TD))) { long long int64 = (long long)(constInt->getZExtValue()); ptr = (unsigned char *)&int64; aggBuffer->addBytes(ptr, 8, Bytes); @@ -1800,7 +1811,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, } if (Cexpr->getOpcode() == Instruction::PtrToInt) { Value *v = Cexpr->getOperand(0)->stripPointerCasts(); - aggBuffer->addSymbol(v); + aggBuffer->addSymbol(v, Cexpr->getOperand(0)); aggBuffer->addZeros(8); break; } @@ -1829,10 +1840,10 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, } case Type::PointerTyID: { if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { - aggBuffer->addSymbol(GVar); + aggBuffer->addSymbol(GVar, GVar); } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { const Value *v = Cexpr->stripPointerCasts(); - aggBuffer->addSymbol(v); + aggBuffer->addSymbol(v, Cexpr); } unsigned int s = TD->getTypeAllocSize(CPV->getType()); aggBuffer->addZeros(s); @@ -1862,7 +1873,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, AggBuffer *aggBuffer) { - const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); + const DataLayout *TD = TM.getDataLayout(); int Bytes; // Old constants @@ -1973,6 +1984,212 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { return false; } +/// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly +/// a copy from AsmPrinter::lowerConstant, except customized to only handle +/// expressions that are representable in PTX and create +/// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions. +const MCExpr * +NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) { + MCContext &Ctx = OutContext; + + if (CV->isNullValue() || isa<UndefValue>(CV)) + return MCConstantExpr::Create(0, Ctx); + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) + return MCConstantExpr::Create(CI->getZExtValue(), Ctx); + + if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { + const MCSymbolRefExpr *Expr = + MCSymbolRefExpr::Create(getSymbol(GV), Ctx); + if (ProcessingGeneric) { + return NVPTXGenericMCSymbolRefExpr::Create(Expr, Ctx); + } else { + return Expr; + } + } + + const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); + if (!CE) { + llvm_unreachable("Unknown constant value to lower!"); + } + + switch (CE->getOpcode()) { + default: + // If the code isn't optimized, there may be outstanding folding + // opportunities. Attempt to fold the expression using DataLayout as a + // last resort before giving up. + if (Constant *C = ConstantFoldConstantExpression(CE, *TM.getDataLayout())) + if (C != CE) + return lowerConstantForGV(C, ProcessingGeneric); + + // Otherwise report the problem to the user. + { + std::string S; + raw_string_ostream OS(S); + OS << "Unsupported expression in static initializer: "; + CE->printAsOperand(OS, /*PrintType=*/false, + !MF ? nullptr : MF->getFunction()->getParent()); + report_fatal_error(OS.str()); + } + + case Instruction::AddrSpaceCast: { + // Strip the addrspacecast and pass along the operand + PointerType *DstTy = cast<PointerType>(CE->getType()); + if (DstTy->getAddressSpace() == 0) { + return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true); + } + std::string S; + raw_string_ostream OS(S); + OS << "Unsupported expression in static initializer: "; + CE->printAsOperand(OS, /*PrintType=*/ false, + !MF ? 0 : MF->getFunction()->getParent()); + report_fatal_error(OS.str()); + } + + case Instruction::GetElementPtr: { + const DataLayout &DL = *TM.getDataLayout(); + + // Generate a symbolic expression for the byte address + APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0); + cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI); + + const MCExpr *Base = lowerConstantForGV(CE->getOperand(0), + ProcessingGeneric); + if (!OffsetAI) + return Base; + + int64_t Offset = OffsetAI.getSExtValue(); + return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), + Ctx); + } + + case Instruction::Trunc: + // We emit the value and depend on the assembler to truncate the generated + // expression properly. This is important for differences between + // blockaddress labels. Since the two labels are in the same function, it + // is reasonable to treat their delta as a 32-bit value. + // FALL THROUGH. + case Instruction::BitCast: + return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric); + + case Instruction::IntToPtr: { + const DataLayout &DL = *TM.getDataLayout(); + + // Handle casts to pointers by changing them into casts to the appropriate + // integer type. This promotes constant folding and simplifies this code. + Constant *Op = CE->getOperand(0); + Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()), + false/*ZExt*/); + return lowerConstantForGV(Op, ProcessingGeneric); + } + + case Instruction::PtrToInt: { + const DataLayout &DL = *TM.getDataLayout(); + + // Support only foldable casts to/from pointers that can be eliminated by + // changing the pointer to the appropriately sized integer type. + Constant *Op = CE->getOperand(0); + Type *Ty = CE->getType(); + + const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric); + + // We can emit the pointer value into this slot if the slot is an + // integer slot equal to the size of the pointer. + if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType())) + return OpExpr; + + // Otherwise the pointer is smaller than the resultant integer, mask off + // the high bits so we are sure to get a proper truncation if the input is + // a constant expr. + unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType()); + const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx); + return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); + } + + // The MC library also has a right-shift operator, but it isn't consistently + // signed or unsigned between different targets. + case Instruction::Add: { + const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric); + const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric); + switch (CE->getOpcode()) { + default: llvm_unreachable("Unknown binary operator constant cast expr"); + case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); + } + } + } +} + +// Copy of MCExpr::print customized for NVPTX +void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) { + switch (Expr.getKind()) { + case MCExpr::Target: + return cast<MCTargetExpr>(&Expr)->PrintImpl(OS); + case MCExpr::Constant: + OS << cast<MCConstantExpr>(Expr).getValue(); + return; + + case MCExpr::SymbolRef: { + const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr); + const MCSymbol &Sym = SRE.getSymbol(); + OS << Sym; + return; + } + + case MCExpr::Unary: { + const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr); + switch (UE.getOpcode()) { + case MCUnaryExpr::LNot: OS << '!'; break; + case MCUnaryExpr::Minus: OS << '-'; break; + case MCUnaryExpr::Not: OS << '~'; break; + case MCUnaryExpr::Plus: OS << '+'; break; + } + printMCExpr(*UE.getSubExpr(), OS); + return; + } + + case MCExpr::Binary: { + const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr); + + // Only print parens around the LHS if it is non-trivial. + if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) || + isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) { + printMCExpr(*BE.getLHS(), OS); + } else { + OS << '('; + printMCExpr(*BE.getLHS(), OS); + OS<< ')'; + } + + switch (BE.getOpcode()) { + case MCBinaryExpr::Add: + // Print "X-42" instead of "X+-42". + if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) { + if (RHSC->getValue() < 0) { + OS << RHSC->getValue(); + return; + } + } + + OS << '+'; + break; + default: llvm_unreachable("Unhandled binary operator"); + } + + // Only print parens around the LHS if it is non-trivial. + if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) { + printMCExpr(*BE.getRHS(), OS); + } else { + OS << '('; + printMCExpr(*BE.getRHS(), OS); + OS << ')'; + } + return; + } + } + + llvm_unreachable("Invalid expression kind!"); +} + /// PrintAsmOperand - Print out an operand for an inline asm expression. /// bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, @@ -2067,16 +2284,9 @@ void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, } } - -// Force static initialization. -extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() { - RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); - RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); -} - void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { std::stringstream temp; - LineReader *reader = this->getReader(filename.str()); + LineReader *reader = this->getReader(filename); temp << "\n//"; temp << filename.str(); temp << ":"; @@ -2084,7 +2294,7 @@ void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { temp << " "; temp << reader->readLine(line); temp << "\n"; - this->OutStreamer.EmitRawText(Twine(temp.str())); + this->OutStreamer->EmitRawText(temp.str()); } LineReader *NVPTXAsmPrinter::getReader(std::string filename) { diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h index c11b579..301c686 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -40,6 +40,7 @@ // (subclass of MCStreamer). namespace llvm { + class MCOperand; class LineReader { private: @@ -86,14 +87,22 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { std::vector<unsigned char> buffer; // the buffer SmallVector<unsigned, 4> symbolPosInBuffer; SmallVector<const Value *, 4> Symbols; + // SymbolsBeforeStripping[i] is the original form of Symbols[i] before + // stripping pointer casts, i.e., + // Symbols[i] == SymbolsBeforeStripping[i]->stripPointerCasts(). + // + // We need to keep these values because AggBuffer::print decides whether to + // emit a "generic()" cast for Symbols[i] depending on the address space of + // SymbolsBeforeStripping[i]. + SmallVector<const Value *, 4> SymbolsBeforeStripping; unsigned curpos; raw_ostream &O; NVPTXAsmPrinter &AP; bool EmitGeneric; public: - AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP) - : size(_size), buffer(_size), O(_O), AP(_AP) { + AggBuffer(unsigned size, raw_ostream &O, NVPTXAsmPrinter &AP) + : size(size), buffer(size), O(O), AP(AP) { curpos = 0; numSymbols = 0; EmitGeneric = AP.EmitGeneric; @@ -119,9 +128,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { } return curpos; } - void addSymbol(const Value *GVar) { + void addSymbol(const Value *GVar, const Value *GVarBeforeStripping) { symbolPosInBuffer.push_back(curpos); Symbols.push_back(GVar); + SymbolsBeforeStripping.push_back(GVarBeforeStripping); numSymbols++; } void print() { @@ -138,17 +148,18 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { unsigned int nSym = 0; unsigned int nextSymbolPos = symbolPosInBuffer[nSym]; unsigned int nBytes = 4; - if (AP.nvptxSubtarget.is64Bit()) + if (static_cast<const NVPTXTargetMachine &>(AP.TM).is64Bit()) nBytes = 8; for (pos = 0; pos < size; pos += nBytes) { if (pos) O << ", "; if (pos == nextSymbolPos) { const Value *v = Symbols[nSym]; + const Value *v0 = SymbolsBeforeStripping[nSym]; if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { MCSymbol *Name = AP.getSymbol(GVar); - PointerType *PTy = dyn_cast<PointerType>(GVar->getType()); - bool IsNonGenericPointer = false; + PointerType *PTy = dyn_cast<PointerType>(v0->getType()); + bool IsNonGenericPointer = false; // Is v0 a non-generic pointer? if (PTy && PTy->getAddressSpace() != 0) { IsNonGenericPointer = true; } @@ -159,8 +170,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { } else { O << *Name; } - } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) { - O << *AP.lowerConstant(Cexpr); + } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) { + const MCExpr *Expr = + AP.lowerConstantForGV(cast<Constant>(CExpr), false); + AP.printMCExpr(*Expr, O); } else llvm_unreachable("symbol type unknown"); nSym++; @@ -187,6 +200,7 @@ private: const Function *F; std::string CurrentFnName; + void EmitBasicBlockStart(const MachineBasicBlock &MBB) const override; void EmitFunctionEntryLabel() override; void EmitFunctionBodyStart() override; void EmitFunctionBodyEnd() override; @@ -211,7 +225,7 @@ private: void printParamName(Function::const_arg_iterator I, int paramIndex, raw_ostream &O); void emitGlobals(const Module &M); - void emitHeader(Module &M, raw_ostream &O); + void emitHeader(Module &M, raw_ostream &O, const NVPTXSubtarget &STI); void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const; void emitVirtualRegister(unsigned int vr, raw_ostream &); void emitFunctionExternParamList(const MachineFunction &MF); @@ -230,6 +244,10 @@ private: bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &) override; + + const MCExpr *lowerConstantForGV(const Constant *CV, bool ProcessingGeneric); + void printMCExpr(const MCExpr &Expr, raw_ostream &OS); + protected: bool doInitialization(Module &M) override; bool doFinalization(Module &M) override; @@ -247,8 +265,10 @@ private: typedef DenseMap<unsigned, unsigned> VRegMap; typedef DenseMap<const TargetRegisterClass *, VRegMap> VRegRCMap; VRegRCMap VRegMapping; - // cache the subtarget here. - const NVPTXSubtarget &nvptxSubtarget; + + // Cache the subtarget here. + const NVPTXSubtarget *nvptxSubtarget; + // Build the map between type name and ID based on module's type // symbol table. std::map<const Type *, std::string> TypeNameMap; @@ -281,6 +301,8 @@ private: MCOperand &MCOp); void lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp); + bool isLoopHeaderOfNoUnroll(const MachineBasicBlock &MBB) const; + LineReader *reader; LineReader *getReader(std::string); @@ -298,12 +320,12 @@ private: bool EmitGeneric; public: - NVPTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer), - nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { + NVPTXAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) + : AsmPrinter(TM, std::move(Streamer)), + EmitGeneric(static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == + NVPTX::CUDA) { CurrentBankselLabelInBasicBlock = ""; reader = nullptr; - EmitGeneric = (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA); } ~NVPTXAsmPrinter() { @@ -311,6 +333,15 @@ public: delete reader; } + bool runOnMachineFunction(MachineFunction &F) override { + nvptxSubtarget = &F.getSubtarget<NVPTXSubtarget>(); + return AsmPrinter::runOnMachineFunction(F); + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineLoopInfo>(); + AsmPrinter::getAnalysisUsage(AU); + } + bool ignoreLoc(const MachineInstr &); std::string getVirtualRegisterName(unsigned) const; diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp index 962b123..7d4be8e 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp @@ -19,8 +19,8 @@ #include "NVPTX.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" -#include "llvm/PassManager.h" #include "llvm/Support/raw_ostream.h" #include <string> diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp index f3a095d..ae63cae 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp @@ -123,19 +123,17 @@ bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP( // => // %0 = gep X, indices // %1 = addrspacecast %0 - GetElementPtrInst *NewGEPI = GetElementPtrInst::Create(Cast->getOperand(0), - Indices, - GEP->getName(), - GEPI); + GetElementPtrInst *NewGEPI = GetElementPtrInst::Create( + GEP->getSourceElementType(), Cast->getOperand(0), Indices, + GEP->getName(), GEPI); NewGEPI->setIsInBounds(GEP->isInBounds()); GEP->replaceAllUsesWith( new AddrSpaceCastInst(NewGEPI, GEP->getType(), "", GEPI)); } else { // GEP is a constant expression. Constant *NewGEPCE = ConstantExpr::getGetElementPtr( - cast<Constant>(Cast->getOperand(0)), - Indices, - GEP->isInBounds()); + GEP->getSourceElementType(), cast<Constant>(Cast->getOperand(0)), + Indices, GEP->isInBounds()); GEP->replaceAllUsesWith( ConstantExpr::getAddrSpaceCast(NewGEPCE, GEP->getType())); } diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp index 314df38..5503494 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp @@ -26,15 +26,15 @@ using namespace llvm; -NVPTXFrameLowering::NVPTXFrameLowering(NVPTXSubtarget &STI) - : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), - is64bit(STI.is64Bit()) {} +NVPTXFrameLowering::NVPTXFrameLowering() + : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0) {} bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; } -void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const { +void NVPTXFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { if (MF.getFrameInfo()->hasStackObjects()) { - MachineBasicBlock &MBB = MF.front(); + assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); // Insert "mov.u32 %SP, %Depot" MachineBasicBlock::iterator MBBI = MBB.begin(); // This instruction really occurs before first instruction @@ -45,7 +45,7 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const { // mov %SPL, %depot; // cvta.local %SP, %SPL; - if (is64bit) { + if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) { unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int64RegsRegClass); MachineInstr *MI = BuildMI(MBB, MBBI, dl, MF.getSubtarget().getInstrInfo()->get( diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h b/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h index 0846b78..14f8bb7 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.h @@ -19,18 +19,16 @@ namespace llvm { class NVPTXSubtarget; class NVPTXFrameLowering : public TargetFrameLowering { - bool is64bit; - public: - explicit NVPTXFrameLowering(NVPTXSubtarget &STI); + explicit NVPTXFrameLowering(); bool hasFP(const MachineFunction &MF) const override; - void emitPrologue(MachineFunction &MF) const override; + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const override; + void + eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const override; }; } // End llvm namespace diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp index 5f06d9a..6fd09c4 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -22,10 +22,10 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/ValueMap.h" -#include "llvm/PassManager.h" #include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; @@ -343,9 +343,11 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C, // GetElementPtrConstantExpr return cast<GEPOperator>(C)->isInBounds() ? Builder.CreateGEP( + cast<GEPOperator>(C)->getSourceElementType(), NewOperands[0], makeArrayRef(&NewOperands[1], NumOperands - 1)) : Builder.CreateInBoundsGEP( + cast<GEPOperator>(C)->getSourceElementType(), NewOperands[0], makeArrayRef(&NewOperands[1], NumOperands - 1)); case Instruction::Select: diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index cd0422d..fa38a68 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -50,11 +50,15 @@ FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM, NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(tm, OptLevel), - Subtarget(tm.getSubtarget<NVPTXSubtarget>()) { + : SelectionDAGISel(tm, OptLevel), TM(tm) { doMulWide = (OptLevel > 0); } +bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { + Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget()); + return SelectionDAGISel::runOnMachineFunction(MF); +} + int NVPTXDAGToDAGISel::getDivF32Level() const { if (UsePrecDivF32.getNumOccurrences() > 0) { // If nvptx-prec-div32=N is used on the command-line, always honor it @@ -74,10 +78,7 @@ bool NVPTXDAGToDAGISel::usePrecSqrtF32() const { return UsePrecSqrtF32; } else { // Otherwise, use sqrt.approx if fast math is enabled - if (TM.Options.UnsafeFPMath) - return false; - else - return true; + return !TM.Options.UnsafeFPMath; } } @@ -89,16 +90,14 @@ bool NVPTXDAGToDAGISel::useF32FTZ() const { const Function *F = MF->getFunction(); // Otherwise, check for an nvptx-f32ftz attribute on the function if (F->hasFnAttribute("nvptx-f32ftz")) - return (F->getAttributes().getAttribute(AttributeSet::FunctionIndex, - "nvptx-f32ftz") - .getValueAsString() == "true"); + return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true"; else return false; } } bool NVPTXDAGToDAGISel::allowFMA() const { - const NVPTXTargetLowering *TL = Subtarget.getTargetLowering(); + const NVPTXTargetLowering *TL = Subtarget->getTargetLowering(); return TL->allowFMA(*MF, OptLevel); } @@ -525,8 +524,7 @@ SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) { } } -static unsigned int getCodeAddrSpace(MemSDNode *N, - const NVPTXSubtarget &Subtarget) { +static unsigned int getCodeAddrSpace(MemSDNode *N) { const Value *Src = N->getMemOperand()->getValue(); if (!Src) @@ -579,20 +577,16 @@ SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { switch (SrcAddrSpace) { default: report_fatal_error("Bad address space in addrspacecast"); case ADDRESS_SPACE_GLOBAL: - Opc = Subtarget.is64Bit() ? NVPTX::cvta_global_yes_64 - : NVPTX::cvta_global_yes; + Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes; break; case ADDRESS_SPACE_SHARED: - Opc = Subtarget.is64Bit() ? NVPTX::cvta_shared_yes_64 - : NVPTX::cvta_shared_yes; + Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes; break; case ADDRESS_SPACE_CONST: - Opc = Subtarget.is64Bit() ? NVPTX::cvta_const_yes_64 - : NVPTX::cvta_const_yes; + Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes; break; case ADDRESS_SPACE_LOCAL: - Opc = Subtarget.is64Bit() ? NVPTX::cvta_local_yes_64 - : NVPTX::cvta_local_yes; + Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes; break; } return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src); @@ -604,20 +598,20 @@ SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { switch (DstAddrSpace) { default: report_fatal_error("Bad address space in addrspacecast"); case ADDRESS_SPACE_GLOBAL: - Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_global_yes_64 - : NVPTX::cvta_to_global_yes; + Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64 + : NVPTX::cvta_to_global_yes; break; case ADDRESS_SPACE_SHARED: - Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_shared_yes_64 - : NVPTX::cvta_to_shared_yes; + Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64 + : NVPTX::cvta_to_shared_yes; break; case ADDRESS_SPACE_CONST: - Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_const_yes_64 - : NVPTX::cvta_to_const_yes; + Opc = + TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes; break; case ADDRESS_SPACE_LOCAL: - Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_local_yes_64 - : NVPTX::cvta_to_local_yes; + Opc = + TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes; break; } return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src); @@ -638,7 +632,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { return nullptr; // Address Space Setting - unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget); + unsigned int codeAddrSpace = getCodeAddrSpace(LD); // Volatile Setting // - .volatile is only availalble for .global and .shared @@ -709,13 +703,12 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { default: return nullptr; } - SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), - getI32Imm(vecType), getI32Imm(fromType), - getI32Imm(fromTypeWidth), Addr, Chain }; + SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl), + getI32Imm(vecType, dl), getI32Imm(fromType, dl), + getI32Imm(fromTypeWidth, dl), Addr, Chain }; NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); - } else if (Subtarget.is64Bit() - ? SelectADDRsi64(N1.getNode(), N1, Base, Offset) - : SelectADDRsi(N1.getNode(), N1, Base, Offset)) { + } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset) + : SelectADDRsi(N1.getNode(), N1, Base, Offset)) { switch (TargetVT) { case MVT::i8: Opcode = NVPTX::LD_i8_asi; @@ -738,14 +731,13 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { default: return nullptr; } - SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), - getI32Imm(vecType), getI32Imm(fromType), - getI32Imm(fromTypeWidth), Base, Offset, Chain }; + SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl), + getI32Imm(vecType, dl), getI32Imm(fromType, dl), + getI32Imm(fromTypeWidth, dl), Base, Offset, Chain }; NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); - } else if (Subtarget.is64Bit() - ? SelectADDRri64(N1.getNode(), N1, Base, Offset) - : SelectADDRri(N1.getNode(), N1, Base, Offset)) { - if (Subtarget.is64Bit()) { + } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset) + : SelectADDRri(N1.getNode(), N1, Base, Offset)) { + if (TM.is64Bit()) { switch (TargetVT) { case MVT::i8: Opcode = NVPTX::LD_i8_ari_64; @@ -792,12 +784,12 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { return nullptr; } } - SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), - getI32Imm(vecType), getI32Imm(fromType), - getI32Imm(fromTypeWidth), Base, Offset, Chain }; + SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl), + getI32Imm(vecType, dl), getI32Imm(fromType, dl), + getI32Imm(fromTypeWidth, dl), Base, Offset, Chain }; NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); } else { - if (Subtarget.is64Bit()) { + if (TM.is64Bit()) { switch (TargetVT) { case MVT::i8: Opcode = NVPTX::LD_i8_areg_64; @@ -844,9 +836,9 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { return nullptr; } } - SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), - getI32Imm(vecType), getI32Imm(fromType), - getI32Imm(fromTypeWidth), N1, Chain }; + SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl), + getI32Imm(vecType, dl), getI32Imm(fromType, dl), + getI32Imm(fromTypeWidth, dl), N1, Chain }; NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); } @@ -874,7 +866,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { return nullptr; // Address Space Setting - unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget); + unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD); // Volatile Setting // - .volatile is only availalble for .global and .shared @@ -970,13 +962,12 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { break; } - SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), - getI32Imm(VecType), getI32Imm(FromType), - getI32Imm(FromTypeWidth), Addr, Chain }; + SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), + getI32Imm(VecType, DL), getI32Imm(FromType, DL), + getI32Imm(FromTypeWidth, DL), Addr, Chain }; LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); - } else if (Subtarget.is64Bit() - ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset) - : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { + } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset) + : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { switch (N->getOpcode()) { default: return nullptr; @@ -1024,14 +1015,13 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { break; } - SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), - getI32Imm(VecType), getI32Imm(FromType), - getI32Imm(FromTypeWidth), Base, Offset, Chain }; + SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), + getI32Imm(VecType, DL), getI32Imm(FromType, DL), + getI32Imm(FromTypeWidth, DL), Base, Offset, Chain }; LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); - } else if (Subtarget.is64Bit() - ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) - : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { - if (Subtarget.is64Bit()) { + } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) + : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { + if (TM.is64Bit()) { switch (N->getOpcode()) { default: return nullptr; @@ -1127,13 +1117,13 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { } } - SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), - getI32Imm(VecType), getI32Imm(FromType), - getI32Imm(FromTypeWidth), Base, Offset, Chain }; + SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), + getI32Imm(VecType, DL), getI32Imm(FromType, DL), + getI32Imm(FromTypeWidth, DL), Base, Offset, Chain }; LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); } else { - if (Subtarget.is64Bit()) { + if (TM.is64Bit()) { switch (N->getOpcode()) { default: return nullptr; @@ -1229,9 +1219,9 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { } } - SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), - getI32Imm(VecType), getI32Imm(FromType), - getI32Imm(FromTypeWidth), Op1, Chain }; + SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL), + getI32Imm(VecType, DL), getI32Imm(FromType, DL), + getI32Imm(FromTypeWidth, DL), Op1, Chain }; LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); } @@ -1425,10 +1415,9 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { SDValue Ops[] = { Addr, Chain }; LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); - } else if (Subtarget.is64Bit() - ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) - : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { - if (Subtarget.is64Bit()) { + } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) + : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { + if (TM.is64Bit()) { switch (N->getOpcode()) { default: return nullptr; @@ -1710,7 +1699,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); } else { - if (Subtarget.is64Bit()) { + if (TM.is64Bit()) { switch (N->getOpcode()) { default: return nullptr; @@ -2013,7 +2002,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { return nullptr; // Address Space Setting - unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget); + unsigned int codeAddrSpace = getCodeAddrSpace(ST); // Volatile Setting // - .volatile is only availalble for .global and .shared @@ -2079,13 +2068,13 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { default: return nullptr; } - SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), - getI32Imm(vecType), getI32Imm(toType), - getI32Imm(toTypeWidth), Addr, Chain }; + SDValue Ops[] = { N1, getI32Imm(isVolatile, dl), + getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl), + getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr, + Chain }; NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); - } else if (Subtarget.is64Bit() - ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) - : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { + } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) + : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { switch (SourceVT) { case MVT::i8: Opcode = NVPTX::ST_i8_asi; @@ -2108,14 +2097,14 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { default: return nullptr; } - SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), - getI32Imm(vecType), getI32Imm(toType), - getI32Imm(toTypeWidth), Base, Offset, Chain }; + SDValue Ops[] = { N1, getI32Imm(isVolatile, dl), + getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl), + getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base, + Offset, Chain }; NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); - } else if (Subtarget.is64Bit() - ? SelectADDRri64(N2.getNode(), N2, Base, Offset) - : SelectADDRri(N2.getNode(), N2, Base, Offset)) { - if (Subtarget.is64Bit()) { + } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset) + : SelectADDRri(N2.getNode(), N2, Base, Offset)) { + if (TM.is64Bit()) { switch (SourceVT) { case MVT::i8: Opcode = NVPTX::ST_i8_ari_64; @@ -2162,12 +2151,13 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { return nullptr; } } - SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), - getI32Imm(vecType), getI32Imm(toType), - getI32Imm(toTypeWidth), Base, Offset, Chain }; + SDValue Ops[] = { N1, getI32Imm(isVolatile, dl), + getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl), + getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base, + Offset, Chain }; NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); } else { - if (Subtarget.is64Bit()) { + if (TM.is64Bit()) { switch (SourceVT) { case MVT::i8: Opcode = NVPTX::ST_i8_areg_64; @@ -2214,9 +2204,10 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { return nullptr; } } - SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), - getI32Imm(vecType), getI32Imm(toType), - getI32Imm(toTypeWidth), N2, Chain }; + SDValue Ops[] = { N1, getI32Imm(isVolatile, dl), + getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl), + getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2, + Chain }; NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); } @@ -2241,7 +2232,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { EVT StoreVT = MemSD->getMemoryVT(); // Address Space Setting - unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget); + unsigned CodeAddrSpace = getCodeAddrSpace(MemSD); if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) { report_fatal_error("Cannot store to pointer that points to constant " @@ -2290,11 +2281,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { return nullptr; } - StOps.push_back(getI32Imm(IsVolatile)); - StOps.push_back(getI32Imm(CodeAddrSpace)); - StOps.push_back(getI32Imm(VecType)); - StOps.push_back(getI32Imm(ToType)); - StOps.push_back(getI32Imm(ToTypeWidth)); + StOps.push_back(getI32Imm(IsVolatile, DL)); + StOps.push_back(getI32Imm(CodeAddrSpace, DL)); + StOps.push_back(getI32Imm(VecType, DL)); + StOps.push_back(getI32Imm(ToType, DL)); + StOps.push_back(getI32Imm(ToTypeWidth, DL)); if (SelectDirectAddr(N2, Addr)) { switch (N->getOpcode()) { @@ -2344,9 +2335,8 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { break; } StOps.push_back(Addr); - } else if (Subtarget.is64Bit() - ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) - : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { + } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) + : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { switch (N->getOpcode()) { default: return nullptr; @@ -2395,10 +2385,9 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { } StOps.push_back(Base); StOps.push_back(Offset); - } else if (Subtarget.is64Bit() - ? SelectADDRri64(N2.getNode(), N2, Base, Offset) - : SelectADDRri(N2.getNode(), N2, Base, Offset)) { - if (Subtarget.is64Bit()) { + } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset) + : SelectADDRri(N2.getNode(), N2, Base, Offset)) { + if (TM.is64Bit()) { switch (N->getOpcode()) { default: return nullptr; @@ -2496,7 +2485,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { StOps.push_back(Base); StOps.push_back(Offset); } else { - if (Subtarget.is64Bit()) { + if (TM.is64Bit()) { switch (N->getOpcode()) { default: return nullptr; @@ -2725,13 +2714,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) { unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue(); SmallVector<SDValue, 2> Ops; - Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32)); + Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32)); Ops.push_back(Chain); Ops.push_back(Flag); - SDNode *Ret = - CurDAG->getMachineNode(Opc, DL, VTs, Ops); - return Ret; + return CurDAG->getMachineNode(Opc, DL, VTs, Ops); } SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) { @@ -2761,7 +2748,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) { SmallVector<SDValue, 6> Ops; for (unsigned i = 0; i < NumElts; ++i) Ops.push_back(N->getOperand(i + 2)); - Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32)); + Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32)); Ops.push_back(Chain); // Determine target opcode @@ -2889,8 +2876,8 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) { SmallVector<SDValue, 8> Ops; for (unsigned i = 0; i < NumElts; ++i) Ops.push_back(N->getOperand(i + 3)); - Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32)); - Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32)); + Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32)); + Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32)); Ops.push_back(Chain); Ops.push_back(Flag); @@ -2985,7 +2972,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) { // the selected StoreParam node. case NVPTXISD::StoreParamU32: { Opcode = NVPTX::StoreParamI32; - SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, + SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32); SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL, MVT::i32, Ops[0], CvtNone); @@ -2994,7 +2981,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) { } case NVPTXISD::StoreParamS32: { Opcode = NVPTX::StoreParamI32; - SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, + SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32); SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL, MVT::i32, Ops[0], CvtNone); @@ -4742,6 +4729,7 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) { /// SelectBFE - Look for instruction sequences that can be made more efficient /// by using the 'bfe' (bit-field extract) PTX instruction SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) { + SDLoc DL(N); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); SDValue Len; @@ -4772,8 +4760,8 @@ SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) { } // How many bits are in our mask? - uint64_t NumBits = CountTrailingOnes_64(MaskVal); - Len = CurDAG->getTargetConstant(NumBits, MVT::i32); + uint64_t NumBits = countTrailingOnes(MaskVal); + Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32); if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) { // We have a 'srl/and' pair, extract the effective start bit and length @@ -4791,7 +4779,7 @@ SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) { // emitting the srl/and pair. return NULL; } - Start = CurDAG->getTargetConstant(StartVal, MVT::i32); + Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32); } else { // Do not handle the case where the shift amount (can be zero if no srl // was found) is not constant. We could handle this case, but it would @@ -4836,10 +4824,10 @@ SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) { NumZeros = 0; // The number of bits in the result bitfield will be the number of // trailing ones (the AND) minus the number of bits we shift off - NumBits = CountTrailingOnes_64(MaskVal) - ShiftAmt; + NumBits = countTrailingOnes(MaskVal) - ShiftAmt; } else if (isShiftedMask_64(MaskVal)) { NumZeros = countTrailingZeros(MaskVal); - unsigned NumOnes = CountTrailingOnes_64(MaskVal >> NumZeros); + unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros); // The number of bits in the result bitfield will be the number of // trailing zeros plus the number of set bits in the mask minus the // number of bits we shift off @@ -4856,8 +4844,8 @@ SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) { } Val = AndLHS; - Start = CurDAG->getTargetConstant(ShiftAmt, MVT::i32); - Len = CurDAG->getTargetConstant(NumBits, MVT::i32); + Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32); + Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32); } else if (LHS->getOpcode() == ISD::SHL) { // Here, we have a pattern like: // @@ -4897,10 +4885,10 @@ SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) { } Start = - CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, MVT::i32); + CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, MVT::i32); Len = CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() - - OuterShiftAmt, MVT::i32); + OuterShiftAmt, DL, MVT::i32); if (N->getOpcode() == ISD::SRA) { // If we have a arithmetic right shift, we need to use the signed bfe @@ -4941,10 +4929,7 @@ SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) { Val, Start, Len }; - SDNode *Ret = - CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); - - return Ret; + return CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops); } // SelectDirectAddr - Match a direct address for DAG. @@ -4976,7 +4961,8 @@ bool NVPTXDAGToDAGISel::SelectADDRsi_imp( if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { SDValue base = Addr.getOperand(0); if (SelectDirectAddr(base, Base)) { - Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt); + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode), + mvt); return true; } } @@ -5001,7 +4987,7 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp( SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); - Offset = CurDAG->getTargetConstant(0, mvt); + Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt); return true; } if (Addr.getOpcode() == ISD::TargetExternalSymbol || @@ -5019,7 +5005,8 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp( Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); else Base = Addr.getOperand(0); - Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt); + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode), + mvt); return true; } } @@ -5056,15 +5043,15 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand( - const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) { + const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { SDValue Op0, Op1; - switch (ConstraintCode) { + switch (ConstraintID) { default: return true; - case 'm': // memory + case InlineAsm::Constraint_m: // memory if (SelectDirectAddr(Op, Op0)) { OutOps.push_back(Op0); - OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32)); + OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); return false; } if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) { diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 69afcd7..fe20580 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -26,6 +26,7 @@ using namespace llvm; namespace { class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { + const NVPTXTargetMachine &TM; // If true, generate mul.wide from sext and mul bool doMulWide; @@ -43,11 +44,11 @@ public: const char *getPassName() const override { return "NVPTX DAG->DAG Pattern Instruction Selection"; } - - const NVPTXSubtarget &Subtarget; + bool runOnMachineFunction(MachineFunction &MF) override; + const NVPTXSubtarget *Subtarget; bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, + unsigned ConstraintID, std::vector<SDValue> &OutOps) override; private: // Include the pieces autogenerated from the target description. @@ -70,8 +71,8 @@ private: SDNode *SelectSurfaceIntrinsic(SDNode *N); SDNode *SelectBFE(SDNode *N); - inline SDValue getI32Imm(unsigned Imm) { - return CurDAG->getTargetConstant(Imm, MVT::i32); + inline SDValue getI32Imm(unsigned Imm, SDLoc DL) { + return CurDAG->getTargetConstant(Imm, DL, MVT::i32); } // Match direct address complex pattern. diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 093ba1a..805847a 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -106,9 +106,9 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty, } // NVPTXTargetLowering Constructor. -NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM) - : TargetLowering(TM), nvTM(&TM), - nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { +NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, + const NVPTXSubtarget &STI) + : TargetLowering(TM), nvTM(&TM), STI(STI) { // always lower memset, memcpy, and memmove intrinsics to load/store // instructions, rather @@ -167,14 +167,14 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM) setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom); setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom); - if (nvptxSubtarget.hasROT64()) { + if (STI.hasROT64()) { setOperationAction(ISD::ROTL, MVT::i64, Legal); setOperationAction(ISD::ROTR, MVT::i64, Legal); } else { setOperationAction(ISD::ROTL, MVT::i64, Expand); setOperationAction(ISD::ROTR, MVT::i64, Expand); } - if (nvptxSubtarget.hasROT32()) { + if (STI.hasROT32()) { setOperationAction(ISD::ROTL, MVT::i32, Legal); setOperationAction(ISD::ROTR, MVT::i32, Legal); } else { @@ -259,6 +259,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM) setOperationAction(ISD::CTPOP, MVT::i32, Legal); setOperationAction(ISD::CTPOP, MVT::i64, Legal); + // PTX does not directly support SELP of i1, so promote to i32 first + setOperationAction(ISD::SELECT, MVT::i1, Custom); + // We have some custom DAG combine patterns for these nodes setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::AND); @@ -268,17 +271,19 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM) // Now deduce the information based on the above mentioned // actions - computeRegisterProperties(); + computeRegisterProperties(STI.getRegisterInfo()); } const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch (Opcode) { - default: - return nullptr; + switch ((NVPTXISD::NodeType)Opcode) { + case NVPTXISD::FIRST_NUMBER: + break; case NVPTXISD::CALL: return "NVPTXISD::CALL"; case NVPTXISD::RET_FLAG: return "NVPTXISD::RET_FLAG"; + case NVPTXISD::LOAD_PARAM: + return "NVPTXISD::LOAD_PARAM"; case NVPTXISD::Wrapper: return "NVPTXISD::Wrapper"; case NVPTXISD::DeclareParam: @@ -287,10 +292,14 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { return "NVPTXISD::DeclareScalarParam"; case NVPTXISD::DeclareRet: return "NVPTXISD::DeclareRet"; + case NVPTXISD::DeclareScalarRet: + return "NVPTXISD::DeclareScalarRet"; case NVPTXISD::DeclareRetParam: return "NVPTXISD::DeclareRetParam"; case NVPTXISD::PrintCall: return "NVPTXISD::PrintCall"; + case NVPTXISD::PrintCallUni: + return "NVPTXISD::PrintCallUni"; case NVPTXISD::LoadParam: return "NVPTXISD::LoadParam"; case NVPTXISD::LoadParamV2: @@ -363,6 +372,8 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { return "NVPTXISD::FUN_SHFR_CLAMP"; case NVPTXISD::IMAD: return "NVPTXISD::IMAD"; + case NVPTXISD::Dummy: + return "NVPTXISD::Dummy"; case NVPTXISD::MUL_WIDE_SIGNED: return "NVPTXISD::MUL_WIDE_SIGNED"; case NVPTXISD::MUL_WIDE_UNSIGNED: @@ -852,6 +863,7 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { case NVPTXISD::Suld3DV4I16Zero: return "NVPTXISD::Suld3DV4I16Zero"; case NVPTXISD::Suld3DV4I32Zero: return "NVPTXISD::Suld3DV4I32Zero"; } + return nullptr; } TargetLoweringBase::LegalizeTypeAction @@ -876,7 +888,7 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, unsigned retAlignment, const ImmutableCallSite *CS) const { - bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + bool isABI = (STI.getSmVersion() >= 20); assert(isABI && "Non-ABI compilation is not supported"); if (!isABI) return ""; @@ -927,7 +939,7 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, } first = false; - if (Outs[OIdx].Flags.isByVal() == false) { + if (!Outs[OIdx].Flags.isByVal()) { if (Ty->isAggregateType() || Ty->isVectorTy()) { unsigned align = 0; const CallInst *CallI = cast<CallInst>(CS->getInstruction()); @@ -1041,7 +1053,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Type *retTy = CLI.RetTy; ImmutableCallSite *CS = CLI.CS; - bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + bool isABI = (STI.getSmVersion() >= 20); assert(isABI && "Non-ABI compilation is not supported"); if (!isABI) return Chain; @@ -1050,9 +1062,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, const Function *F = MF.getFunction(); SDValue tempChain = Chain; - Chain = - DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true), - dl); + Chain = DAG.getCALLSEQ_START(Chain, + DAG.getIntPtrConstant(uniqueCallSite, dl, true), + dl); SDValue InFlag = Chain.getValue(1); unsigned paramCount = 0; @@ -1072,7 +1084,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, EVT VT = Outs[OIdx].VT; Type *Ty = Args[i].Ty; - if (Outs[OIdx].Flags.isByVal() == false) { + if (!Outs[OIdx].Flags.isByVal()) { if (Ty->isAggregateType()) { // aggregate SmallVector<EVT, 16> vtparts; @@ -1083,9 +1095,11 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // declare .param .align <align> .b8 .param<n>[<size>]; unsigned sz = TD->getTypeAllocSize(Ty); SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32), - DAG.getConstant(paramCount, MVT::i32), - DAG.getConstant(sz, MVT::i32), InFlag }; + SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, dl, + MVT::i32), + DAG.getConstant(paramCount, dl, MVT::i32), + DAG.getConstant(sz, dl, MVT::i32), + InFlag }; Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, DeclareParamOps); InFlag = Chain.getValue(1); @@ -1100,8 +1114,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue CopyParamOps[] = { Chain, - DAG.getConstant(paramCount, MVT::i32), - DAG.getConstant(Offsets[j], MVT::i32), + DAG.getConstant(paramCount, dl, MVT::i32), + DAG.getConstant(Offsets[j], dl, MVT::i32), StVal, InFlag }; Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, CopyParamOps, @@ -1121,9 +1135,11 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // declare .param .align <align> .b8 .param<n>[<size>]; unsigned sz = TD->getTypeAllocSize(Ty); SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, MVT::i32), - DAG.getConstant(paramCount, MVT::i32), - DAG.getConstant(sz, MVT::i32), InFlag }; + SDValue DeclareParamOps[] = { Chain, + DAG.getConstant(align, dl, MVT::i32), + DAG.getConstant(paramCount, dl, MVT::i32), + DAG.getConstant(sz, dl, MVT::i32), + InFlag }; Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, DeclareParamOps); InFlag = Chain.getValue(1); @@ -1144,8 +1160,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue CopyParamOps[] = { Chain, - DAG.getConstant(paramCount, MVT::i32), - DAG.getConstant(0, MVT::i32), Elt, + DAG.getConstant(paramCount, dl, MVT::i32), + DAG.getConstant(0, dl, MVT::i32), Elt, InFlag }; Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, CopyParamOps, @@ -1161,9 +1177,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue CopyParamOps[] = { Chain, - DAG.getConstant(paramCount, MVT::i32), - DAG.getConstant(0, MVT::i32), Elt0, Elt1, - InFlag }; + DAG.getConstant(paramCount, dl, MVT::i32), + DAG.getConstant(0, dl, MVT::i32), Elt0, + Elt1, InFlag }; Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl, CopyParamVTs, CopyParamOps, MemVT, MachinePointerInfo()); @@ -1193,8 +1209,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDValue StoreVal; SmallVector<SDValue, 8> Ops; Ops.push_back(Chain); - Ops.push_back(DAG.getConstant(paramCount, MVT::i32)); - Ops.push_back(DAG.getConstant(curOffset, MVT::i32)); + Ops.push_back(DAG.getConstant(paramCount, dl, MVT::i32)); + Ops.push_back(DAG.getConstant(curOffset, dl, MVT::i32)); unsigned Opc = NVPTXISD::StoreParamV2; @@ -1261,9 +1277,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue DeclareParamOps[] = { Chain, - DAG.getConstant(paramCount, MVT::i32), - DAG.getConstant(sz, MVT::i32), - DAG.getConstant(0, MVT::i32), InFlag }; + DAG.getConstant(paramCount, dl, MVT::i32), + DAG.getConstant(sz, dl, MVT::i32), + DAG.getConstant(0, dl, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, DeclareParamOps); InFlag = Chain.getValue(1); @@ -1276,8 +1292,10 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, OutV = DAG.getNode(opc, dl, MVT::i16, OutV); } SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), - DAG.getConstant(0, MVT::i32), OutV, InFlag }; + SDValue CopyParamOps[] = { Chain, + DAG.getConstant(paramCount, dl, MVT::i32), + DAG.getConstant(0, dl, MVT::i32), OutV, + InFlag }; unsigned opcode = NVPTXISD::StoreParam; if (Outs[OIdx].Flags.isZExt()) @@ -1306,9 +1324,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // so we don't need to worry about natural alignment or not. // See TargetLowering::LowerCallTo(). SDValue DeclareParamOps[] = { - Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), MVT::i32), - DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32), - InFlag + Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), dl, MVT::i32), + DAG.getConstant(paramCount, dl, MVT::i32), + DAG.getConstant(sz, dl, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, DeclareParamOps); @@ -1319,7 +1337,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset); SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx], - DAG.getConstant(curOffset, getPointerTy())); + DAG.getConstant(curOffset, dl, getPointerTy())); SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, MachinePointerInfo(), false, false, false, PartAlign); @@ -1327,9 +1345,10 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal); } SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), - DAG.getConstant(curOffset, MVT::i32), theVal, - InFlag }; + SDValue CopyParamOps[] = { Chain, + DAG.getConstant(paramCount, dl, MVT::i32), + DAG.getConstant(curOffset, dl, MVT::i32), + theVal, InFlag }; Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, CopyParamOps, elemtype, MachinePointerInfo()); @@ -1361,9 +1380,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (resultsz < 32) resultsz = 32; SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32), - DAG.getConstant(resultsz, MVT::i32), - DAG.getConstant(0, MVT::i32), InFlag }; + SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32), + DAG.getConstant(resultsz, dl, MVT::i32), + DAG.getConstant(0, dl, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, DeclareRetOps); InFlag = Chain.getValue(1); @@ -1371,9 +1390,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, retAlignment = getArgumentAlignment(Callee, CS, retTy, 0); SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue DeclareRetOps[] = { Chain, - DAG.getConstant(retAlignment, MVT::i32), - DAG.getConstant(resultsz / 8, MVT::i32), - DAG.getConstant(0, MVT::i32), InFlag }; + DAG.getConstant(retAlignment, dl, MVT::i32), + DAG.getConstant(resultsz / 8, dl, MVT::i32), + DAG.getConstant(0, dl, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, DeclareRetOps); InFlag = Chain.getValue(1); @@ -1401,7 +1420,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Op to just print "call" SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue PrintCallOps[] = { - Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, MVT::i32), InFlag + Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InFlag }; Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall), dl, PrintCallVTs, PrintCallOps); @@ -1427,20 +1446,22 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, else opcode = NVPTXISD::CallArg; SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32), - DAG.getConstant(i, MVT::i32), InFlag }; + SDValue CallArgOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32), + DAG.getConstant(i, dl, MVT::i32), InFlag }; Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps); InFlag = Chain.getValue(1); } SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32), + SDValue CallArgEndOps[] = { Chain, + DAG.getConstant(Func ? 1 : 0, dl, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps); InFlag = Chain.getValue(1); if (!Func) { SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32), + SDValue PrototypeOps[] = { Chain, + DAG.getConstant(uniqueCallSite, dl, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps); InFlag = Chain.getValue(1); @@ -1452,11 +1473,11 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, EVT ObjectVT = getValueType(retTy); unsigned NumElts = ObjectVT.getVectorNumElements(); EVT EltVT = ObjectVT.getVectorElementType(); - assert(nvTM->getSubtargetImpl()->getTargetLowering()->getNumRegisters( - F->getContext(), ObjectVT) == NumElts && + assert(STI.getTargetLowering()->getNumRegisters(F->getContext(), + ObjectVT) == NumElts && "Vector was not scalarized"); unsigned sz = EltVT.getSizeInBits(); - bool needTruncate = sz < 8 ? true : false; + bool needTruncate = sz < 8; if (NumElts == 1) { // Just a simple load @@ -1471,11 +1492,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, LoadRetVTs.push_back(EltVT); LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue); - SmallVector<SDValue, 4> LoadRetOps; - LoadRetOps.push_back(Chain); - LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); - LoadRetOps.push_back(DAG.getConstant(0, MVT::i32)); - LoadRetOps.push_back(InFlag); + SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32), + DAG.getConstant(0, dl, MVT::i32), InFlag}; SDValue retval = DAG.getMemIntrinsicNode( NVPTXISD::LoadParam, dl, DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); @@ -1501,11 +1519,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue); - SmallVector<SDValue, 4> LoadRetOps; - LoadRetOps.push_back(Chain); - LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); - LoadRetOps.push_back(DAG.getConstant(0, MVT::i32)); - LoadRetOps.push_back(InFlag); + SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32), + DAG.getConstant(0, dl, MVT::i32), InFlag}; SDValue retval = DAG.getMemIntrinsicNode( NVPTXISD::LoadParamV2, dl, DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); @@ -1547,11 +1562,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue); - SmallVector<SDValue, 4> LoadRetOps; - LoadRetOps.push_back(Chain); - LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); - LoadRetOps.push_back(DAG.getConstant(Ofst, MVT::i32)); - LoadRetOps.push_back(InFlag); + SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32), + DAG.getConstant(Ofst, dl, MVT::i32), InFlag}; SDValue retval = DAG.getMemIntrinsicNode( Opc, dl, DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); @@ -1583,7 +1595,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, for (unsigned i = 0, e = Ins.size(); i != e; ++i) { unsigned sz = VTs[i].getSizeInBits(); unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]); - bool needTruncate = sz < 8 ? true : false; + bool needTruncate = sz < 8; if (VTs[i].isInteger() && (sz < 8)) sz = 8; @@ -1605,11 +1617,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue); - SmallVector<SDValue, 4> LoadRetOps; - LoadRetOps.push_back(Chain); - LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); - LoadRetOps.push_back(DAG.getConstant(Offsets[i], MVT::i32)); - LoadRetOps.push_back(InFlag); + SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32), + DAG.getConstant(Offsets[i], dl, MVT::i32), + InFlag}; SDValue retval = DAG.getMemIntrinsicNode( NVPTXISD::LoadParam, dl, DAG.getVTList(LoadRetVTs), LoadRetOps, @@ -1624,8 +1634,10 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } } - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true), - DAG.getIntPtrConstant(uniqueCallSite + 1, true), + Chain = DAG.getCALLSEQ_END(Chain, + DAG.getIntPtrConstant(uniqueCallSite, dl, true), + DAG.getIntPtrConstant(uniqueCallSite + 1, dl, + true), InFlag, dl); uniqueCallSite++; @@ -1651,7 +1663,7 @@ NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { unsigned NumSubElem = VVT.getVectorNumElements(); for (unsigned j = 0; j < NumSubElem; ++j) { Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, - DAG.getIntPtrConstant(j))); + DAG.getIntPtrConstant(j, dl))); } } return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Ops); @@ -1675,7 +1687,7 @@ SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op, SDValue ShAmt = Op.getOperand(2); unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; - if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) { + if (VTBits == 32 && STI.getSmVersion() >= 35) { // For 32bit and sm35, we can use the funnel shift 'shf' instruction. // {dHi, dLo} = {aHi, aLo} >> Amt @@ -1700,16 +1712,18 @@ SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op, // dHi = aHi >> Amt SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, - DAG.getConstant(VTBits, MVT::i32), ShAmt); + DAG.getConstant(VTBits, dl, MVT::i32), + ShAmt); SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, - DAG.getConstant(VTBits, MVT::i32)); + DAG.getConstant(VTBits, dl, MVT::i32)); SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, - DAG.getConstant(VTBits, MVT::i32), ISD::SETGE); + DAG.getConstant(VTBits, dl, MVT::i32), + ISD::SETGE); SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); @@ -1735,7 +1749,7 @@ SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op, SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); - if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) { + if (VTBits == 32 && STI.getSmVersion() >= 35) { // For 32bit and sm35, we can use the funnel shift 'shf' instruction. // {dHi, dLo} = {aHi, aLo} << Amt @@ -1760,16 +1774,18 @@ SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op, // dHi = (aHi << Amt) | (aLo >> (size-Amt)) SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, - DAG.getConstant(VTBits, MVT::i32), ShAmt); + DAG.getConstant(VTBits, dl, MVT::i32), + ShAmt); SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, - DAG.getConstant(VTBits, MVT::i32)); + DAG.getConstant(VTBits, dl, MVT::i32)); SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, - DAG.getConstant(VTBits, MVT::i32), ISD::SETGE); + DAG.getConstant(VTBits, dl, MVT::i32), + ISD::SETGE); SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); @@ -1803,11 +1819,29 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SRA_PARTS: case ISD::SRL_PARTS: return LowerShiftRightParts(Op, DAG); + case ISD::SELECT: + return LowerSelect(Op, DAG); default: llvm_unreachable("Custom lowering not defined for operation"); } } +SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const { + SDValue Op0 = Op->getOperand(0); + SDValue Op1 = Op->getOperand(1); + SDValue Op2 = Op->getOperand(2); + SDLoc DL(Op.getNode()); + + assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1"); + + Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1); + Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2); + SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select); + + return Trunc; +} + SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { if (Op.getValueType() == MVT::i1) return LowerLOADi1(Op, DAG); @@ -1924,16 +1958,14 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { // Then the split values for (unsigned i = 0; i < NumElts; ++i) { SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, - DAG.getIntPtrConstant(i)); + DAG.getIntPtrConstant(i, DL)); if (NeedExt) ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal); Ops.push_back(ExtVal); } // Then any remaining arguments - for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) { - Ops.push_back(N->getOperand(i)); - } + Ops.append(N->op_begin() + 2, N->op_end()); SDValue NewSt = DAG.getMemIntrinsicNode( Opcode, DL, DAG.getVTList(MVT::Other), Ops, @@ -2029,13 +2061,13 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( const Function *F = MF.getFunction(); const AttributeSet &PAL = F->getAttributes(); - const TargetLowering *TLI = DAG.getSubtarget().getTargetLowering(); + const TargetLowering *TLI = STI.getTargetLowering(); SDValue Root = DAG.getRoot(); std::vector<SDValue> OutChains; bool isKernel = llvm::isKernelFunction(*F); - bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + bool isABI = (STI.getSmVersion() >= 20); assert(isABI && "Non-ABI compilation is not supported"); if (!isABI) return Chain; @@ -2070,7 +2102,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() : nullptr))) { assert(isKernel && "Only kernels can have image/sampler params"); - InVals.push_back(DAG.getConstant(i + 1, MVT::i32)); + InVals.push_back(DAG.getConstant(i + 1, dl, MVT::i32)); continue; } @@ -2109,7 +2141,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( // to newly created nodes. The SDNodes for params have to // appear in the same order as their order of appearance // in the original function. "idx+1" holds that order. - if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) { + if (!PAL.hasAttribute(i + 1, Attribute::ByVal)) { if (Ty->isAggregateType()) { SmallVector<EVT, 16> vtparts; SmallVector<uint64_t, 16> offsets; @@ -2132,7 +2164,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( llvm::ADDRESS_SPACE_PARAM)); SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, - DAG.getConstant(offsets[parti], getPointerTy())); + DAG.getConstant(offsets[parti], dl, getPointerTy())); unsigned partAlign = aggregateIsPacked ? 1 : TD->getABITypeAlignment( @@ -2197,9 +2229,9 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( P.getNode()->setIROrder(idx + 1); SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, - DAG.getIntPtrConstant(1)); + DAG.getIntPtrConstant(1, dl)); if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) { Elt0 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt0); @@ -2232,7 +2264,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( llvm::ADDRESS_SPACE_PARAM)); SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, - DAG.getConstant(Ofst, getPointerTy())); + DAG.getConstant(Ofst, dl, getPointerTy())); SDValue P = DAG.getLoad( VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false, false, true, @@ -2244,7 +2276,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( if (i + j >= NumElts) break; SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P, - DAG.getIntPtrConstant(j)); + DAG.getIntPtrConstant(j, dl)); if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt); InVals.push_back(Elt); @@ -2302,7 +2334,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( else { SDValue p2 = DAG.getNode( ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, - DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p); + DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, dl, MVT::i32), p); InVals.push_back(p2); } } @@ -2333,7 +2365,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, Type *RetTy = F->getReturnType(); const DataLayout *TD = getDataLayout(); - bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + bool isABI = (STI.getSmVersion() >= 20); assert(isABI && "Non-ABI compilation is not supported"); if (!isABI) return Chain; @@ -2356,7 +2388,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // We only have one element, so just directly store it if (NeedExtend) StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); - SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal }; + SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal }; Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, DAG.getVTList(MVT::Other), Ops, EltVT, MachinePointerInfo()); @@ -2371,7 +2403,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal1); } - SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal0, + SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal0, StoreVal1 }; Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl, DAG.getVTList(MVT::Other), Ops, @@ -2403,7 +2435,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, SDValue StoreVal; SmallVector<SDValue, 8> Ops; Ops.push_back(Chain); - Ops.push_back(DAG.getConstant(Offset, MVT::i32)); + Ops.push_back(DAG.getConstant(Offset, dl, MVT::i32)); unsigned Opc = NVPTXISD::StoreRetvalV2; EVT ExtendedVT = (NeedExtend) ? MVT::i16 : OutVals[0].getValueType(); @@ -2468,7 +2500,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, if (TheValType.isVector()) TmpVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TheValType.getVectorElementType(), TmpVal, - DAG.getIntPtrConstant(j)); + DAG.getIntPtrConstant(j, dl)); EVT TheStoreType = ValVTs[i]; if (RetTy->isIntegerTy() && TD->getTypeAllocSizeInBits(RetTy) < 32) { @@ -2482,7 +2514,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, SDValue Ops[] = { Chain, - DAG.getConstant(Offsets[i], MVT::i32), + DAG.getConstant(Offsets[i], dl, MVT::i32), TmpVal }; Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, DAG.getVTList(MVT::Other), Ops, @@ -3753,7 +3785,8 @@ NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const { } std::pair<unsigned, const TargetRegisterClass *> -NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, +NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + const std::string &Constraint, MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { @@ -3774,7 +3807,7 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return std::make_pair(0U, &NVPTX::Float64RegsRegClass); } } - return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } /// getFunctionAlignment - Return the Log2 alignment of this function. @@ -3885,7 +3918,7 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, const SDNode *left = N0.getOperand(0).getNode(); const SDNode *right = N0.getOperand(1).getNode(); - if (dyn_cast<ConstantSDNode>(left) || dyn_cast<ConstantSDNode>(right)) + if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right)) opIsLive = true; if (!opIsLive) @@ -4103,6 +4136,7 @@ static SDValue TryMULWIDECombine(SDNode *N, return SDValue(); } + SDLoc DL(N); unsigned OptSize = MulType.getSizeInBits() >> 1; SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); @@ -4125,7 +4159,7 @@ static SDValue TryMULWIDECombine(SDNode *N, unsigned BitWidth = MulType.getSizeInBits(); if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) { APInt MulVal = APInt(BitWidth, 1) << ShiftAmt; - RHS = DCI.DAG.getConstant(MulVal, MulType); + RHS = DCI.DAG.getConstant(MulVal, DL, MulType); } else { return SDValue(); } @@ -4147,9 +4181,9 @@ static SDValue TryMULWIDECombine(SDNode *N, // Truncate the operands to the correct size. Note that these are just for // type consistency and will (likely) be eliminated in later phases. SDValue TruncLHS = - DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, LHS); + DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, LHS); SDValue TruncRHS = - DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, RHS); + DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, RHS); unsigned Opc; if (Signed) { @@ -4158,7 +4192,7 @@ static SDValue TryMULWIDECombine(SDNode *N, Opc = NVPTXISD::MUL_WIDE_UNSIGNED; } - return DCI.DAG.getNode(Opc, SDLoc(N), MulType, TruncLHS, TruncRHS); + return DCI.DAG.getNode(Opc, DL, MulType, TruncLHS, TruncRHS); } /// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes. @@ -4196,7 +4230,7 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, default: break; case ISD::ADD: case ISD::FADD: - return PerformADDCombine(N, DCI, nvptxSubtarget, OptLevel); + return PerformADDCombine(N, DCI, STI, OptLevel); case ISD::MUL: return PerformMULCombine(N, DCI, OptLevel); case ISD::SHL: @@ -4281,15 +4315,12 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, } } - SmallVector<SDValue, 8> OtherOps; - // Copy regular operands - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - OtherOps.push_back(N->getOperand(i)); + SmallVector<SDValue, 8> OtherOps(N->op_begin(), N->op_end()); // The select routine does not have access to the LoadSDNode instance, so // pass along the extension information - OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType())); + OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL)); SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, LD->getMemoryVT(), @@ -4398,8 +4429,7 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, OtherOps.push_back(Chain); // Chain // Skip operand 1 (intrinsic ID) // Others - for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) - OtherOps.push_back(N->getOperand(i)); + OtherOps.append(N->op_begin() + 2, N->op_end()); MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); @@ -4430,9 +4460,7 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, "Custom handling of non-i8 ldu/ldg?"); // Just copy all operands as-is - SmallVector<SDValue, 4> Ops; - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - Ops.push_back(N->getOperand(i)); + SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end()); // Force output to i16 SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other); @@ -4490,10 +4518,9 @@ NVPTXTargetObjectFile::~NVPTXTargetObjectFile() { delete DwarfLocSection; delete DwarfARangesSection; delete DwarfRangesSection; - delete DwarfMacroInfoSection; } -const MCSection * +MCSection * NVPTXTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang, const TargetMachine &TM) const { diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index b3fea3f..5142ae3 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -21,7 +21,7 @@ namespace llvm { namespace NVPTXISD { -enum NodeType { +enum NodeType : unsigned { // Start the numbering from where ISD NodeType finishes. FIRST_NUMBER = ISD::BUILTIN_OP_END, Wrapper, @@ -436,7 +436,8 @@ class NVPTXSubtarget; //===--------------------------------------------------------------------===// class NVPTXTargetLowering : public TargetLowering { public: - explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM); + explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM, + const NVPTXSubtarget &STI); SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; @@ -469,7 +470,8 @@ public: ConstraintType getConstraintType(const std::string &Constraint) const override; std::pair<unsigned, const TargetRegisterClass *> - getRegForInlineAsmConstraint(const std::string &Constraint, + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + const std::string &Constraint, MVT VT) const override; SDValue LowerFormalArguments( @@ -495,6 +497,12 @@ public: std::vector<SDValue> &Ops, SelectionDAG &DAG) const override; + unsigned getInlineAsmMemConstraint( + const std::string &ConstraintCode) const override { + // FIXME: Map different constraints differently. + return InlineAsm::Constraint_m; + } + const NVPTXTargetMachine *nvTM; // PTX always uses 32-bit shift amounts @@ -510,7 +518,7 @@ public: bool enableAggressiveFMAFusion(EVT VT) const override { return true; } private: - const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here + const NVPTXSubtarget &STI; // cache the subtarget here SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, EVT = MVT::i32) const; @@ -529,6 +537,8 @@ private: SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const; + void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const override; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp index 740ca03..dabc3be 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -28,9 +28,7 @@ using namespace llvm; // Pin the vtable to this file. void NVPTXInstrInfo::anchor() {} -// FIXME: Add the subtarget support on this constructor. -NVPTXInstrInfo::NVPTXInstrInfo(NVPTXSubtarget &STI) - : NVPTXGenInstrInfo(), RegInfo(STI) {} +NVPTXInstrInfo::NVPTXInstrInfo() : NVPTXGenInstrInfo(), RegInfo() {} void NVPTXInstrInfo::copyPhysReg( MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h index 6de7536..9b5d491 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h @@ -27,7 +27,7 @@ class NVPTXInstrInfo : public NVPTXGenInstrInfo { const NVPTXRegisterInfo RegInfo; virtual void anchor(); public: - explicit NVPTXInstrInfo(NVPTXSubtarget &STI); + explicit NVPTXInstrInfo(); const NVPTXRegisterInfo &getRegisterInfo() const { return RegInfo; } diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 2c571c4..6fdd60f 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -117,24 +117,24 @@ def F32ConstOne : Operand<f32>, PatLeaf<(f32 fpimm)>, SDNodeXForm<fpimm, [{ //===----------------------------------------------------------------------===// -def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">; -def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">; -def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">; +def hasAtomRedG32 : Predicate<"Subtarget->hasAtomRedG32()">; +def hasAtomRedS32 : Predicate<"Subtarget->hasAtomRedS32()">; +def hasAtomRedGen32 : Predicate<"Subtarget->hasAtomRedGen32()">; def useAtomRedG32forGen32 : - Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">; -def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">; -def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">; -def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">; -def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">; + Predicate<"!Subtarget->hasAtomRedGen32() && Subtarget->hasAtomRedG32()">; +def hasBrkPt : Predicate<"Subtarget->hasBrkPt()">; +def hasAtomRedG64 : Predicate<"Subtarget->hasAtomRedG64()">; +def hasAtomRedS64 : Predicate<"Subtarget->hasAtomRedS64()">; +def hasAtomRedGen64 : Predicate<"Subtarget->hasAtomRedGen64()">; def useAtomRedG64forGen64 : - Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">; -def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">; -def hasVote : Predicate<"Subtarget.hasVote()">; -def hasDouble : Predicate<"Subtarget.hasDouble()">; -def reqPTX20 : Predicate<"Subtarget.reqPTX20()">; -def hasLDG : Predicate<"Subtarget.hasLDG()">; -def hasLDU : Predicate<"Subtarget.hasLDU()">; -def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">; + Predicate<"!Subtarget->hasAtomRedGen64() && Subtarget->hasAtomRedG64()">; +def hasAtomAddF32 : Predicate<"Subtarget->hasAtomAddF32()">; +def hasVote : Predicate<"Subtarget->hasVote()">; +def hasDouble : Predicate<"Subtarget->hasDouble()">; +def reqPTX20 : Predicate<"Subtarget->reqPTX20()">; +def hasLDG : Predicate<"Subtarget->hasLDG()">; +def hasLDU : Predicate<"Subtarget->hasLDU()">; +def hasGenericLdSt : Predicate<"Subtarget->hasGenericLdSt()">; def doF32FTZ : Predicate<"useF32FTZ()">; def doNoF32FTZ : Predicate<"!useF32FTZ()">; @@ -150,12 +150,12 @@ def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">; def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">; def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">; -def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">; -def noHWROT32 : Predicate<"!Subtarget.hasHWROT32()">; +def hasHWROT32 : Predicate<"Subtarget->hasHWROT32()">; +def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">; def true : Predicate<"1">; -def hasPTX31 : Predicate<"Subtarget.getPTXVersion() >= 31">; +def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">; //===----------------------------------------------------------------------===// @@ -452,13 +452,13 @@ def Int4Const : PatLeaf<(imm), [{ def SHL2MUL32 : SDNodeXForm<imm, [{ const APInt &v = N->getAPIntValue(); APInt temp(32, 1); - return CurDAG->getTargetConstant(temp.shl(v), MVT::i32); + return CurDAG->getTargetConstant(temp.shl(v), SDLoc(N), MVT::i32); }]>; def SHL2MUL16 : SDNodeXForm<imm, [{ const APInt &v = N->getAPIntValue(); APInt temp(16, 1); - return CurDAG->getTargetConstant(temp.shl(v), MVT::i16); + return CurDAG->getTargetConstant(temp.shl(v), SDLoc(N), MVT::i16); }]>; def MULWIDES64 @@ -1138,7 +1138,7 @@ def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst), []>; def SUB_FRM_32 : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32); + return CurDAG->getTargetConstant(32-N->getZExtValue(), SDLoc(N), MVT::i32); }]>; def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)), @@ -1189,7 +1189,7 @@ def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, []>; def SUB_FRM_64 : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(64-N->getZExtValue(), MVT::i32); + return CurDAG->getTargetConstant(64-N->getZExtValue(), SDLoc(N), MVT::i32); }]>; def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)), @@ -1356,11 +1356,6 @@ defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>; defm SELP_f32 : SELP_PATTERN<"f32", Float32Regs, f32imm, fpimm>; defm SELP_f64 : SELP_PATTERN<"f64", Float64Regs, f64imm, fpimm>; -// Special select for predicate operands -def : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)), - (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a), - (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>; - // // Funnnel shift in clamp mode // @@ -1659,12 +1654,12 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> { (SET_f64ir fpimm:$a, Float64Regs:$b, Mode)>; } -defm FSetGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>; -defm FSetLT : FSET_FORMAT<setolt, CmpLT, CmpLT_FTZ>; -defm FSetGE : FSET_FORMAT<setoge, CmpGE, CmpGE_FTZ>; -defm FSetLE : FSET_FORMAT<setole, CmpLE, CmpLE_FTZ>; -defm FSetEQ : FSET_FORMAT<setoeq, CmpEQ, CmpEQ_FTZ>; -defm FSetNE : FSET_FORMAT<setone, CmpNE, CmpNE_FTZ>; +defm FSetOGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>; +defm FSetOLT : FSET_FORMAT<setolt, CmpLT, CmpLT_FTZ>; +defm FSetOGE : FSET_FORMAT<setoge, CmpGE, CmpGE_FTZ>; +defm FSetOLE : FSET_FORMAT<setole, CmpLE, CmpLE_FTZ>; +defm FSetOEQ : FSET_FORMAT<setoeq, CmpEQ, CmpEQ_FTZ>; +defm FSetONE : FSET_FORMAT<setone, CmpNE, CmpNE_FTZ>; defm FSetUGT : FSET_FORMAT<setugt, CmpGTU, CmpGTU_FTZ>; defm FSetULT : FSET_FORMAT<setult, CmpLTU, CmpLTU_FTZ>; @@ -1673,6 +1668,13 @@ defm FSetULE : FSET_FORMAT<setule, CmpLEU, CmpLEU_FTZ>; defm FSetUEQ : FSET_FORMAT<setueq, CmpEQU, CmpEQU_FTZ>; defm FSetUNE : FSET_FORMAT<setune, CmpNEU, CmpNEU_FTZ>; +defm FSetGT : FSET_FORMAT<setgt, CmpGT, CmpGT_FTZ>; +defm FSetLT : FSET_FORMAT<setlt, CmpLT, CmpLT_FTZ>; +defm FSetGE : FSET_FORMAT<setge, CmpGE, CmpGE_FTZ>; +defm FSetLE : FSET_FORMAT<setle, CmpLE, CmpLE_FTZ>; +defm FSetEQ : FSET_FORMAT<seteq, CmpEQ, CmpEQ_FTZ>; +defm FSetNE : FSET_FORMAT<setne, CmpNE, CmpNE_FTZ>; + defm FSetNUM : FSET_FORMAT<seto, CmpNUM, CmpNUM_FTZ>; defm FSetNAN : FSET_FORMAT<setuo, CmpNAN, CmpNAN_FTZ>; diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index f0c3663..6ab0fad 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "NVPTXLowerAggrCopies.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" @@ -22,10 +24,33 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "nvptx" using namespace llvm; -namespace llvm { FunctionPass *createLowerAggrCopies(); } +namespace { +// actual analysis class, which is a functionpass +struct NVPTXLowerAggrCopies : public FunctionPass { + static char ID; + + NVPTXLowerAggrCopies() : FunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<MachineFunctionAnalysis>(); + AU.addPreserved<StackProtector>(); + } + + bool runOnFunction(Function &F) override; + + static const unsigned MaxAggrCopySize = 128; + + const char *getPassName() const override { + return "Lower aggregate copies/intrinsics into loops"; + } +}; +} // namespace char NVPTXLowerAggrCopies::ID = 0; @@ -45,8 +70,8 @@ static void convertTransferToLoop( // srcAddr and dstAddr are expected to be pointer types, // so no check is made here. - unsigned srcAS = dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace(); - unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace(); + unsigned srcAS = cast<PointerType>(srcAddr->getType())->getAddressSpace(); + unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace(); // Cast pointers to (char *) srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS)); @@ -59,9 +84,11 @@ static void convertTransferToLoop( ind->addIncoming(ConstantInt::get(indType, 0), origBB); // load from srcAddr+ind - Value *val = loop.CreateLoad(loop.CreateGEP(srcAddr, ind), srcVolatile); + Value *val = loop.CreateLoad(loop.CreateGEP(loop.getInt8Ty(), srcAddr, ind), + srcVolatile); // store at dstAddr+ind - loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), dstVolatile); + loop.CreateStore(val, loop.CreateGEP(loop.getInt8Ty(), dstAddr, ind), + dstVolatile); // The value for ind coming from backedge is (ind + 1) Value *newind = loop.CreateAdd(ind, ConstantInt::get(indType, 1)); @@ -81,7 +108,7 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr, origBB->getTerminator()->setSuccessor(0, loopBB); IRBuilder<> builder(origBB, origBB->getTerminator()); - unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace(); + unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace(); // Cast pointer to the type of value getting stored dstAddr = @@ -91,7 +118,7 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr, PHINode *ind = loop.CreatePHI(len->getType(), 0); ind->addIncoming(ConstantInt::get(len->getType(), 0), origBB); - loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), false); + loop.CreateStore(val, loop.CreateGEP(val->getType(), dstAddr, ind), false); Value *newind = loop.CreateAdd(ind, ConstantInt::get(len->getType(), 1)); ind->addIncoming(newind, loopBB); @@ -104,7 +131,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { SmallVector<MemTransferInst *, 4> aggrMemcpys; SmallVector<MemSetInst *, 4> aggrMemsets; - const DataLayout *DL = &getAnalysis<DataLayoutPass>().getDataLayout(); + const DataLayout &DL = F.getParent()->getDataLayout(); LLVMContext &Context = F.getParent()->getContext(); // @@ -117,10 +144,10 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { ++II) { if (LoadInst *load = dyn_cast<LoadInst>(II)) { - if (load->hasOneUse() == false) + if (!load->hasOneUse()) continue; - if (DL->getTypeStoreSize(load->getType()) < MaxAggrCopySize) + if (DL.getTypeStoreSize(load->getType()) < MaxAggrCopySize) continue; User *use = load->user_back(); @@ -166,7 +193,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { StoreInst *store = dyn_cast<StoreInst>(*load->user_begin()); Value *srcAddr = load->getOperand(0); Value *dstAddr = store->getOperand(1); - unsigned numLoads = DL->getTypeStoreSize(load->getType()); + unsigned numLoads = DL.getTypeStoreSize(load->getType()); Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads); convertTransferToLoop(store, srcAddr, dstAddr, len, load->isVolatile(), diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h index da301d5..3c39f53 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h @@ -15,35 +15,10 @@ #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H #define LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H -#include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/CodeGen/StackProtector.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/Pass.h" - namespace llvm { +class FunctionPass; -// actual analysis class, which is a functionpass -struct NVPTXLowerAggrCopies : public FunctionPass { - static char ID; - - NVPTXLowerAggrCopies() : FunctionPass(ID) {} - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<DataLayoutPass>(); - AU.addPreserved<MachineFunctionAnalysis>(); - AU.addPreserved<StackProtector>(); - } - - bool runOnFunction(Function &F) override; - - static const unsigned MaxAggrCopySize = 128; - - const char *getPassName() const override { - return "Lower aggregate copies/intrinsics into loops"; - } -}; - -extern FunctionPass *createLowerAggrCopies(); +FunctionPass *createLowerAggrCopies(); } #endif diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp index 3149399..68dfbb7 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp @@ -35,7 +35,8 @@ namespace llvm { void initializeNVPTXLowerStructArgsPass(PassRegistry &); } -class LLVM_LIBRARY_VISIBILITY NVPTXLowerStructArgs : public FunctionPass { +namespace { +class NVPTXLowerStructArgs : public FunctionPass { bool runOnFunction(Function &F) override; void handleStructPtrArgs(Function &); @@ -48,6 +49,7 @@ public: return "Copy structure (byval *) arguments to stack"; } }; +} // namespace char NVPTXLowerStructArgs::ID = 1; diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp index 137248b..779b65e 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp @@ -45,3 +45,13 @@ void NVPTXFloatMCExpr::PrintImpl(raw_ostream &OS) const { OS << std::string(NumHex - HexStr.length(), '0'); OS << utohexstr(API.getZExtValue()); } + +const NVPTXGenericMCSymbolRefExpr* +NVPTXGenericMCSymbolRefExpr::Create(const MCSymbolRefExpr *SymExpr, + MCContext &Ctx) { + return new (Ctx) NVPTXGenericMCSymbolRefExpr(SymExpr); +} + +void NVPTXGenericMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const { + OS << "generic(" << *SymExpr << ")"; +} diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h index d39a394..8c6b219 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h @@ -29,8 +29,8 @@ private: const VariantKind Kind; const APFloat Flt; - explicit NVPTXFloatMCExpr(VariantKind _Kind, APFloat _Flt) - : Kind(_Kind), Flt(_Flt) {} + explicit NVPTXFloatMCExpr(VariantKind Kind, APFloat Flt) + : Kind(Kind), Flt(Flt) {} public: /// @name Construction @@ -68,9 +68,7 @@ public: return false; } void visitUsedExpr(MCStreamer &Streamer) const override {}; - const MCSection *FindAssociatedSection() const override { - return nullptr; - } + MCSection *FindAssociatedSection() const override { return nullptr; } // There are no TLS NVPTXMCExprs at the moment. void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {} @@ -79,6 +77,48 @@ public: return E->getKind() == MCExpr::Target; } }; + +/// A wrapper for MCSymbolRefExpr that tells the assembly printer that the +/// symbol should be enclosed by generic(). +class NVPTXGenericMCSymbolRefExpr : public MCTargetExpr { +private: + const MCSymbolRefExpr *SymExpr; + + explicit NVPTXGenericMCSymbolRefExpr(const MCSymbolRefExpr *_SymExpr) + : SymExpr(_SymExpr) {} + +public: + /// @name Construction + /// @{ + + static const NVPTXGenericMCSymbolRefExpr + *Create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx); + + /// @} + /// @name Accessors + /// @{ + + /// getOpcode - Get the kind of this expression. + const MCSymbolRefExpr *getSymbolExpr() const { return SymExpr; } + + /// @} + + void PrintImpl(raw_ostream &OS) const override; + bool EvaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout, + const MCFixup *Fixup) const override { + return false; + } + void visitUsedExpr(MCStreamer &Streamer) const override {}; + MCSection *FindAssociatedSection() const override { return nullptr; } + + // There are no TLS NVPTXMCExprs at the moment. + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {} + + static bool classof(const MCExpr *E) { + return E->getKind() == MCExpr::Target; + } + }; } // end namespace llvm #endif diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp index a1e1b9e..5fd69a6 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp @@ -48,9 +48,9 @@ MachineFunctionPass *llvm::createNVPTXPrologEpilogPass() { char NVPTXPrologEpilogPass::ID = 0; bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) { - const TargetMachine &TM = MF.getTarget(); - const TargetFrameLowering &TFI = *TM.getSubtargetImpl()->getFrameLowering(); - const TargetRegisterInfo &TRI = *TM.getSubtargetImpl()->getRegisterInfo(); + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetFrameLowering &TFI = *STI.getFrameLowering(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); bool Modified = false; calculateFrameObjectOffsets(MF); @@ -68,7 +68,7 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) { } // Add function prolog/epilog - TFI.emitPrologue(MF); + TFI.emitPrologue(MF, MF.front()); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { // If last instruction is a return instruction, add an epilogue diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp index 358ccce..6e97f9e 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -71,15 +71,14 @@ std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) { } } -NVPTXRegisterInfo::NVPTXRegisterInfo(const NVPTXSubtarget &st) - : NVPTXGenRegisterInfo(0), Is64Bit(st.is64Bit()) {} +NVPTXRegisterInfo::NVPTXRegisterInfo() : NVPTXGenRegisterInfo(0) {} #define GET_REGINFO_TARGET_DESC #include "NVPTXGenRegisterInfo.inc" /// NVPTX Callee Saved Registers const MCPhysReg * -NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { +NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *) const { static const MCPhysReg CalleeSavedRegs[] = { 0 }; return CalleeSavedRegs; } diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h index d2e6733..c310a9c 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h @@ -22,27 +22,20 @@ #include "NVPTXGenRegisterInfo.inc" namespace llvm { - -// Forward Declarations. -class TargetInstrInfo; -class NVPTXSubtarget; - class NVPTXRegisterInfo : public NVPTXGenRegisterInfo { private: - bool Is64Bit; // Hold Strings that can be free'd all together with NVPTXRegisterInfo ManagedStringPool ManagedStrPool; public: - NVPTXRegisterInfo(const NVPTXSubtarget &st); + NVPTXRegisterInfo(); //------------------------------------------------------ // Pure virtual functions from TargetRegisterInfo //------------------------------------------------------ // NVPTX callee saved registers - const MCPhysReg * - getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; BitVector getReservedRegs(const MachineFunction &MF) const override; diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp index b7f53c7..e83f735 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp @@ -16,6 +16,7 @@ #include "NVPTX.h" #include "NVPTXMachineFunctionInfo.h" #include "NVPTXSubtarget.h" +#include "NVPTXTargetMachine.h" #include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -142,8 +143,9 @@ findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) { case NVPTX::LD_i64_avar: { // The handle is a parameter value being loaded, replace with the // parameter symbol - const NVPTXSubtarget &ST = MF.getTarget().getSubtarget<NVPTXSubtarget>(); - if (ST.getDrvInterface() == NVPTX::CUDA) { + const NVPTXTargetMachine &TM = + static_cast<const NVPTXTargetMachine &>(MF.getTarget()); + if (TM.getDrvInterface() == NVPTX::CUDA) { // For CUDA, we preserve the param loads coming from function arguments return false; } diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h b/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h index f1d3cb4..0d2627d 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h @@ -26,7 +26,7 @@ namespace llvm { class NVPTXSection : public MCSection { virtual void anchor(); public: - NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K) {} + NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K, nullptr) {} virtual ~NVPTXSection() {} /// Override this as NVPTX has its own way of printing switching @@ -36,11 +36,8 @@ public: const MCExpr *Subsection) const override {} /// Base address of PTX sections is zero. - bool isBaseAddressKnownZero() const override { return true; } bool UseCodeAlign() const override { return false; } bool isVirtualSection() const override { return false; } - std::string getLabelBeginName() const override { return ""; } - std::string getLabelEndName() const override { return ""; } }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp index 3d52532..069d6e1 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "NVPTXSubtarget.h" +#include "NVPTXTargetMachine.h" using namespace llvm; @@ -25,17 +26,6 @@ using namespace llvm; // Pin the vtable to this file. void NVPTXSubtarget::anchor() {} -static std::string computeDataLayout(bool is64Bit) { - std::string Ret = "e"; - - if (!is64Bit) - Ret += "-p:32:32"; - - Ret += "-i64:64-v16:16-v32:32-n16:32:64"; - - return Ret; -} - NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { // Provide the default CPU if we don't have one. @@ -54,18 +44,18 @@ NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU, } NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, const TargetMachine &TM, - bool is64Bit) - : NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0), - SmVersion(20), DL(computeDataLayout(is64Bit)), - InstrInfo(initializeSubtargetDependencies(CPU, FS)), - TLInfo((const NVPTXTargetMachine &)TM), TSInfo(&DL), - FrameLowering(*this) { - - Triple T(TT); - - if (T.getOS() == Triple::NVCL) - drvInterface = NVPTX::NVCL; - else - drvInterface = NVPTX::CUDA; + const std::string &FS, + const NVPTXTargetMachine &TM) + : NVPTXGenSubtargetInfo(TT, CPU, FS), PTXVersion(0), SmVersion(20), TM(TM), + InstrInfo(), TLInfo(TM, initializeSubtargetDependencies(CPU, FS)), + TSInfo(TM.getDataLayout()), FrameLowering() {} + +bool NVPTXSubtarget::hasImageHandles() const { + // Enable handles for Kepler+, where CUDA supports indirect surfaces and + // textures + if (TM.getDrvInterface() == NVPTX::CUDA) + return (SmVersion >= 30); + + // Disabled, otherwise + return false; } diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h index fb2d404..e9833e5 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h @@ -32,8 +32,6 @@ namespace llvm { class NVPTXSubtarget : public NVPTXGenSubtargetInfo { virtual void anchor(); std::string TargetName; - NVPTX::DrvInterface drvInterface; - bool Is64Bit; // PTX version x.y is represented as 10*x+y, e.g. 3.1 == 31 unsigned PTXVersion; @@ -41,7 +39,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo { // SM version x.y is represented as 10*x+y, e.g. 3.1 == 31 unsigned int SmVersion; - const DataLayout DL; // Calculates type size & alignment + const NVPTXTargetMachine &TM; NVPTXInstrInfo InstrInfo; NVPTXTargetLowering TLInfo; TargetSelectionDAGInfo TSInfo; @@ -55,13 +53,12 @@ public: /// of the specified module. /// NVPTXSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, const TargetMachine &TM, bool is64Bit); + const std::string &FS, const NVPTXTargetMachine &TM); const TargetFrameLowering *getFrameLowering() const override { return &FrameLowering; } const NVPTXInstrInfo *getInstrInfo() const override { return &InstrInfo; } - const DataLayout *getDataLayout() const override { return &DL; } const NVPTXRegisterInfo *getRegisterInfo() const override { return &InstrInfo.getRegisterInfo(); } @@ -95,20 +92,9 @@ public: } inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); } inline bool hasROT64() const { return SmVersion >= 20; } - - bool hasImageHandles() const { - // Enable handles for Kepler+, where CUDA supports indirect surfaces and - // textures - if (getDrvInterface() == NVPTX::CUDA) - return (SmVersion >= 30); - - // Disabled, otherwise - return false; - } - bool is64Bit() const { return Is64Bit; } + bool hasImageHandles() const; unsigned int getSmVersion() const { return SmVersion; } - NVPTX::DrvInterface getDrvInterface() const { return drvInterface; } std::string getTargetName() const { return TargetName; } unsigned getPTXVersion() const { return PTXVersion; } diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index c7f9507..ac27c30 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -17,6 +17,7 @@ #include "NVPTXAllocaHoisting.h" #include "NVPTXLowerAggrCopies.h" #include "NVPTXTargetObjectFile.h" +#include "NVPTXTargetTransformInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" @@ -24,12 +25,12 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" @@ -49,6 +50,7 @@ using namespace llvm; namespace llvm { void initializeNVVMReflectPass(PassRegistry&); void initializeGenericToNVVMPass(PassRegistry&); +void initializeNVPTXAllocaHoistingPass(PassRegistry &); void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); void initializeNVPTXLowerStructArgsPass(PassRegistry &); @@ -63,20 +65,37 @@ extern "C" void LLVMInitializeNVPTXTarget() { // but it's very NVPTX-specific. initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); initializeGenericToNVVMPass(*PassRegistry::getPassRegistry()); + initializeNVPTXAllocaHoistingPass(*PassRegistry::getPassRegistry()); initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry()); initializeNVPTXFavorNonGenericAddrSpacesPass( *PassRegistry::getPassRegistry()); initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry()); } +static std::string computeDataLayout(bool is64Bit) { + std::string Ret = "e"; + + if (!is64Bit) + Ret += "-p:32:32"; + + Ret += "-i64:64-v16:16-v32:32-n16:32:64"; + + return Ret; +} + NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64bit) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - TLOF(make_unique<NVPTXTargetObjectFile>()), - Subtarget(TT, CPU, FS, *this, is64bit) { + : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM, + CM, OL), + is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()), + Subtarget(TT, CPU, FS, *this) { + if (Triple(TT).getOS() == Triple::NVCL) + drvInterface = NVPTX::NVCL; + else + drvInterface = NVPTX::CUDA; initAsmInfo(); } @@ -124,12 +143,9 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { return PassConfig; } -void NVPTXTargetMachine::addAnalysisPasses(PassManagerBase &PM) { - // Add first the target-independent BasicTTI pass, then our NVPTX pass. This - // allows the NVPTX pass to delegate to the target independent layer when - // appropriate. - PM.add(createBasicTargetTransformInfoPass(this)); - PM.add(createNVPTXTargetTransformInfoPass(this)); +TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() { + return TargetIRAnalysis( + [this](Function &) { return TargetTransformInfo(NVPTXTTIImpl(this)); }); } void NVPTXPassConfig::addIRPasses() { @@ -148,29 +164,27 @@ void NVPTXPassConfig::addIRPasses() { addPass(createNVPTXAssignValidGlobalNamesPass()); addPass(createGenericToNVVMPass()); addPass(createNVPTXFavorNonGenericAddrSpacesPass()); + // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave + // them unused. We could remove dead code in an ad-hoc manner, but that + // requires manual work and might be error-prone. + addPass(createDeadCodeEliminationPass()); addPass(createSeparateConstOffsetFromGEPPass()); - // The SeparateConstOffsetFromGEP pass creates variadic bases that can be used - // by multiple GEPs. Run GVN or EarlyCSE to really reuse them. GVN generates - // significantly better code than EarlyCSE for some of our benchmarks. + // ReassociateGEPs exposes more opportunites for SLSR. See + // the example in reassociate-geps-and-slsr.ll. + addPass(createStraightLineStrengthReducePass()); + // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or + // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE + // for some of our benchmarks. if (getOptLevel() == CodeGenOpt::Aggressive) addPass(createGVNPass()); else addPass(createEarlyCSEPass()); - // Both FavorNonGenericAddrSpaces and SeparateConstOffsetFromGEP may leave - // some dead code. We could remove dead code in an ad-hoc manner, but that - // requires manual work and might be error-prone. - // - // The FavorNonGenericAddrSpaces pass shortcuts unnecessary addrspacecasts, - // and leave them unused. - // - // SeparateConstOffsetFromGEP rebuilds a new index from the old index, and the - // old index and some of its intermediate results may become unused. - addPass(createDeadCodeEliminationPass()); + // Run NaryReassociate after EarlyCSE/GVN to be more effective. + addPass(createNaryReassociatePass()); } bool NVPTXPassConfig::addInstSelector() { - const NVPTXSubtarget &ST = - getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>(); + const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl(); addPass(createLowerAggrCopies()); addPass(createAllocaHoisting()); diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h index fa97ec8..2cd10e8 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -25,7 +25,9 @@ namespace llvm { /// NVPTXTargetMachine /// class NVPTXTargetMachine : public LLVMTargetMachine { + bool is64bit; std::unique_ptr<TargetLoweringObjectFile> TLOF; + NVPTX::DrvInterface drvInterface; NVPTXSubtarget Subtarget; // Hold Strings that can be free'd all together with NVPTXTargetMachine @@ -37,9 +39,12 @@ public: CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit); ~NVPTXTargetMachine() override; - - const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; } - + const NVPTXSubtarget *getSubtargetImpl(const Function &) const override { + return &Subtarget; + } + const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; } + bool is64Bit() const { return is64bit; } + NVPTX::DrvInterface getDrvInterface() const { return drvInterface; } ManagedStringPool *getManagedStrPool() const { return const_cast<ManagedStringPool *>(&ManagedStrPool); } @@ -47,7 +52,7 @@ public: TargetPassConfig *createPassConfig(PassManagerBase &PM) override; // Emission of machine code through MCJIT is not supported. - bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &, + bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_pwrite_stream &, bool = true) override { return true; } @@ -55,8 +60,7 @@ public: return TLOF.get(); } - /// \brief Register NVPTX analysis passes with a pass manager. - void addAnalysisPasses(PassManagerBase &PM) override; + TargetIRAnalysis getTargetIRAnalysis() override; }; // NVPTXTargetMachine. diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h index 00ceca5..5ecdc87 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h @@ -41,7 +41,6 @@ public: DwarfLocSection = nullptr; DwarfARangesSection = nullptr; DwarfRangesSection = nullptr; - DwarfMacroInfoSection = nullptr; } virtual ~NVPTXTargetObjectFile(); @@ -83,24 +82,22 @@ public: new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); DwarfRangesSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); - DwarfMacroInfoSection = - new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); } - const MCSection *getSectionForConstant(SectionKind Kind, - const Constant *C) const override { + MCSection *getSectionForConstant(SectionKind Kind, + const Constant *C) const override { return ReadOnlySection; } - const MCSection *getExplicitSectionGlobal(const GlobalValue *GV, - SectionKind Kind, Mangler &Mang, - const TargetMachine &TM) const override { + MCSection *getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler &Mang, + const TargetMachine &TM) const override { return DataSection; } - const MCSection * - SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang, - const TargetMachine &TM) const override; + MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler &Mang, + const TargetMachine &TM) const override; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index b09d0d4..dc81802 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -1,4 +1,4 @@ -//===-- NVPTXTargetTransformInfo.cpp - NVPTX specific TTI pass ---------===// +//===-- NVPTXTargetTransformInfo.cpp - NVPTX specific TTI -----------------===// // // The LLVM Compiler Infrastructure // @@ -6,19 +6,13 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// \file -// This file implements a TargetTransformInfo analysis pass specific to the -// NVPTX target machine. It uses the target's detailed information to provide -// more precise answers to certain TTI queries, while letting the target -// independent and default TTI implementations handle the rest. -// -//===----------------------------------------------------------------------===// -#include "NVPTXTargetMachine.h" +#include "NVPTXTargetTransformInfo.h" +#include "NVPTXUtilities.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/Support/Debug.h" #include "llvm/Target/CostTable.h" #include "llvm/Target/TargetLowering.h" @@ -26,69 +20,79 @@ using namespace llvm; #define DEBUG_TYPE "NVPTXtti" -// Declare the pass initialization routine locally as target-specific passes -// don't have a target-wide initialization entry point, and so we rely on the -// pass constructor initialization. -namespace llvm { -void initializeNVPTXTTIPass(PassRegistry &); -} - -namespace { - -class NVPTXTTI final : public ImmutablePass, public TargetTransformInfo { - const NVPTXTargetLowering *TLI; -public: - NVPTXTTI() : ImmutablePass(ID), TLI(nullptr) { - llvm_unreachable("This pass cannot be directly constructed"); - } - - NVPTXTTI(const NVPTXTargetMachine *TM) - : ImmutablePass(ID), TLI(TM->getSubtargetImpl()->getTargetLowering()) { - initializeNVPTXTTIPass(*PassRegistry::getPassRegistry()); +// Whether the given intrinsic reads threadIdx.x/y/z. +static bool readsThreadIndex(const IntrinsicInst *II) { + switch (II->getIntrinsicID()) { + default: return false; + case Intrinsic::nvvm_read_ptx_sreg_tid_x: + case Intrinsic::nvvm_read_ptx_sreg_tid_y: + case Intrinsic::nvvm_read_ptx_sreg_tid_z: + return true; } +} - void initializePass() override { pushTTIStack(this); } +static bool readsLaneId(const IntrinsicInst *II) { + return II->getIntrinsicID() == Intrinsic::ptx_read_laneid; +} - void getAnalysisUsage(AnalysisUsage &AU) const override { - TargetTransformInfo::getAnalysisUsage(AU); +// Whether the given intrinsic is an atomic instruction in PTX. +static bool isNVVMAtomic(const IntrinsicInst *II) { + switch (II->getIntrinsicID()) { + default: return false; + case Intrinsic::nvvm_atomic_load_add_f32: + case Intrinsic::nvvm_atomic_load_inc_32: + case Intrinsic::nvvm_atomic_load_dec_32: + return true; } +} - /// Pass identification. - static char ID; - - /// Provide necessary pointer adjustments for the two base classes. - void *getAdjustedAnalysisPointer(const void *ID) override { - if (ID == &TargetTransformInfo::ID) - return (TargetTransformInfo *)this; - return this; +bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) { + // Without inter-procedural analysis, we conservatively assume that arguments + // to __device__ functions are divergent. + if (const Argument *Arg = dyn_cast<Argument>(V)) + return !isKernelFunction(*Arg->getParent()); + + if (const Instruction *I = dyn_cast<Instruction>(V)) { + // Without pointer analysis, we conservatively assume values loaded from + // generic or local address space are divergent. + if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { + unsigned AS = LI->getPointerAddressSpace(); + return AS == ADDRESS_SPACE_GENERIC || AS == ADDRESS_SPACE_LOCAL; + } + // Atomic instructions may cause divergence. Atomic instructions are + // executed sequentially across all threads in a warp. Therefore, an earlier + // executed thread may see different memory inputs than a later executed + // thread. For example, suppose *a = 0 initially. + // + // atom.global.add.s32 d, [a], 1 + // + // returns 0 for the first thread that enters the critical region, and 1 for + // the second thread. + if (I->isAtomic()) + return true; + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + // Instructions that read threadIdx are obviously divergent. + if (readsThreadIndex(II) || readsLaneId(II)) + return true; + // Handle the NVPTX atomic instrinsics that cannot be represented as an + // atomic IR instruction. + if (isNVVMAtomic(II)) + return true; + } + // Conservatively consider the return value of function calls as divergent. + // We could analyze callees with bodies more precisely using + // inter-procedural analysis. + if (isa<CallInst>(I)) + return true; } - bool hasBranchDivergence() const override; - - unsigned getArithmeticInstrCost( - unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue, - OperandValueKind Opd2Info = OK_AnyValue, - OperandValueProperties Opd1PropInfo = OP_None, - OperandValueProperties Opd2PropInfo = OP_None) const override; -}; - -} // end anonymous namespace - -INITIALIZE_AG_PASS(NVPTXTTI, TargetTransformInfo, "NVPTXtti", - "NVPTX Target Transform Info", true, true, false) -char NVPTXTTI::ID = 0; - -ImmutablePass * -llvm::createNVPTXTargetTransformInfoPass(const NVPTXTargetMachine *TM) { - return new NVPTXTTI(TM); + return false; } -bool NVPTXTTI::hasBranchDivergence() const { return true; } - -unsigned NVPTXTTI::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, - OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, - OperandValueProperties Opd2PropInfo) const { +unsigned NVPTXTTIImpl::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, + TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo) { // Legalize the type. std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); @@ -96,8 +100,8 @@ unsigned NVPTXTTI::getArithmeticInstrCost( switch (ISD) { default: - return TargetTransformInfo::getArithmeticInstrCost( - Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); + return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo); case ISD::ADD: case ISD::MUL: case ISD::XOR: @@ -109,7 +113,7 @@ unsigned NVPTXTTI::getArithmeticInstrCost( if (LT.second.SimpleTy == MVT::i64) return 2 * LT.first; // Delegate other cases to the basic TTI. - return TargetTransformInfo::getArithmeticInstrCost( - Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); + return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo); } } diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h new file mode 100644 index 0000000..4280888 --- /dev/null +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -0,0 +1,76 @@ +//===-- NVPTXTargetTransformInfo.h - NVPTX specific TTI ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file a TargetTransformInfo::Concept conforming object specific to the +/// NVPTX target machine. It uses the target's detailed information to +/// provide more precise answers to certain TTI queries, while letting the +/// target independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXTARGETTRANSFORMINFO_H + +#include "NVPTX.h" +#include "NVPTXTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm { + +class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> { + typedef BasicTTIImplBase<NVPTXTTIImpl> BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const NVPTXSubtarget *ST; + const NVPTXTargetLowering *TLI; + + const NVPTXSubtarget *getST() const { return ST; }; + const NVPTXTargetLowering *getTLI() const { return TLI; }; + +public: + explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM) + : BaseT(TM), ST(TM->getSubtargetImpl()), TLI(ST->getTargetLowering()) {} + + // Provide value semantics. MSVC requires that we spell all of these out. + NVPTXTTIImpl(const NVPTXTTIImpl &Arg) + : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {} + NVPTXTTIImpl(NVPTXTTIImpl &&Arg) + : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)), + TLI(std::move(Arg.TLI)) {} + NVPTXTTIImpl &operator=(const NVPTXTTIImpl &RHS) { + BaseT::operator=(static_cast<const BaseT &>(RHS)); + ST = RHS.ST; + TLI = RHS.TLI; + return *this; + } + NVPTXTTIImpl &operator=(NVPTXTTIImpl &&RHS) { + BaseT::operator=(std::move(static_cast<BaseT &>(RHS))); + ST = std::move(RHS.ST); + TLI = std::move(RHS.TLI); + return *this; + } + + bool hasBranchDivergence() { return true; } + + bool isSourceOfDivergence(const Value *V); + + unsigned getArithmeticInstrCost( + unsigned Opcode, Type *Ty, + TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, + TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, + TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, + TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None); +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp index cf1feac..1f178af 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -293,12 +293,9 @@ bool llvm::isKernelFunction(const Function &F) { unsigned x = 0; bool retval = llvm::findOneNVVMAnnotation( &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], x); - if (retval == false) { + if (!retval) { // There is no NVVM metadata, check the calling convention - if (F.getCallingConv() == llvm::CallingConv::PTX_Kernel) - return true; - else - return false; + return F.getCallingConv() == llvm::CallingConv::PTX_Kernel; } return (x == 1); } @@ -307,7 +304,7 @@ bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) { std::vector<unsigned> Vs; bool retval = llvm::findAllNVVMAnnotation( &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], Vs); - if (retval == false) + if (!retval) return false; for (int i = 0, e = Vs.size(); i < e; i++) { unsigned v = Vs[i]; diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td b/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td index 85aa34e..a237247 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td @@ -735,19 +735,19 @@ def VecShuffle_v2i64 : NVPTXVecInst<(outs V2I64Regs:$dst), def ShuffleMask0 : SDNodeXForm<vector_shuffle, [{ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); - return CurDAG->getTargetConstant(SVOp->getMaskElt(0), MVT::i32); + return CurDAG->getTargetConstant(SVOp->getMaskElt(0), SDLoc(N), MVT::i32); }]>; def ShuffleMask1 : SDNodeXForm<vector_shuffle, [{ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); - return CurDAG->getTargetConstant(SVOp->getMaskElt(1), MVT::i32); + return CurDAG->getTargetConstant(SVOp->getMaskElt(1), SDLoc(N), MVT::i32); }]>; def ShuffleMask2 : SDNodeXForm<vector_shuffle, [{ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); - return CurDAG->getTargetConstant(SVOp->getMaskElt(2), MVT::i32); + return CurDAG->getTargetConstant(SVOp->getMaskElt(2), SDLoc(N), MVT::i32); }]>; def ShuffleMask3 : SDNodeXForm<vector_shuffle, [{ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); - return CurDAG->getTargetConstant(SVOp->getMaskElt(3), MVT::i32); + return CurDAG->getTargetConstant(SVOp->getMaskElt(3), SDLoc(N), MVT::i32); }]>; // The spurious call is here to silence a compiler warning about N being diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXutil.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXutil.cpp deleted file mode 100644 index 5f074b3..0000000 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXutil.cpp +++ /dev/null @@ -1,90 +0,0 @@ -//===-- NVPTXutil.cpp - Functions exported to CodeGen --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the functions that can be used in CodeGen. -// -//===----------------------------------------------------------------------===// - -#include "NVPTXutil.h" -#include "NVPTX.h" - -using namespace llvm; - -namespace llvm { - -bool isParamLoad(const MachineInstr *MI) { - if ((MI->getOpcode() != NVPTX::LD_i32_avar) && - (MI->getOpcode() != NVPTX::LD_i64_avar)) - return false; - if (MI->getOperand(2).isImm() == false) - return false; - if (MI->getOperand(2).getImm() != NVPTX::PTXLdStInstCode::PARAM) - return false; - return true; -} - -#define DATA_MASK 0x7f -#define DIGIT_WIDTH 7 -#define MORE_BYTES 0x80 - -static int encode_leb128(uint64_t val, int *nbytes, char *space, int splen) { - char *a; - char *end = space + splen; - - a = space; - do { - unsigned char uc; - - if (a >= end) - return 1; - uc = val & DATA_MASK; - val >>= DIGIT_WIDTH; - if (val != 0) - uc |= MORE_BYTES; - *a = uc; - a++; - } while (val); - *nbytes = a - space; - return 0; -} - -#undef DATA_MASK -#undef DIGIT_WIDTH -#undef MORE_BYTES - -uint64_t encode_leb128(const char *str) { - union { - uint64_t x; - char a[8]; - } temp64; - - temp64.x = 0; - - for (unsigned i = 0, e = strlen(str); i != e; ++i) - temp64.a[i] = str[e - 1 - i]; - - char encoded[16]; - int nbytes; - - int retval = encode_leb128(temp64.x, &nbytes, encoded, 16); - - (void) retval; - assert(retval == 0 && "Encoding to leb128 failed"); - - assert(nbytes <= 8 && - "Cannot support register names with leb128 encoding > 8 bytes"); - - temp64.x = 0; - for (int i = 0; i < nbytes; ++i) - temp64.a[i] = encoded[i]; - - return temp64.x; -} - -} // end namespace llvm diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXutil.h b/contrib/llvm/lib/Target/NVPTX/NVPTXutil.h deleted file mode 100644 index 1915dac..0000000 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXutil.h +++ /dev/null @@ -1,25 +0,0 @@ -//===-- NVPTXutil.h - Functions exported to CodeGen --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the functions that can be used in CodeGen. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H -#define LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H - -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstr.h" - -namespace llvm { -bool isParamLoad(const MachineInstr *); -uint64_t encode_leb128(const char *str); -} - -#endif diff --git a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp index a8d6b95..5e375b7 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_os_ostream.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include <map> #include <sstream> @@ -137,6 +138,26 @@ bool NVVMReflect::handleFunction(Function *ReflectFunction) { // ConstantArray can be found successfully, see if it can be // found in VarMap. If so, replace the uses of CallInst with the // value found in VarMap. If not, replace the use with value 0. + + // IR for __nvvm_reflect calls differs between CUDA versions: + // CUDA 6.5 and earlier uses this sequence: + // %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8 + // (i8 addrspace(4)* getelementptr inbounds + // ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0)) + // %reflect = tail call i32 @__nvvm_reflect(i8* %ptr) + // + // Value returned by Sym->getOperand(0) is a Constant with a + // ConstantDataSequential operand which can be converted to string and used + // for lookup. + // + // CUDA 7.0 does it slightly differently: + // %reflect = call i32 @__nvvm_reflect(i8* addrspacecast + // (i8 addrspace(1)* getelementptr inbounds + // ([8 x i8], [8 x i8] addrspace(1)* @str, i32 0, i32 0) to i8*)) + // + // In this case, we get a Constant with a GlobalVariable operand and we need + // to dig deeper to find its initializer with the string we'll use for lookup. + for (User *U : ReflectFunction->users()) { assert(isa<CallInst>(U) && "Only a call instruction can use _reflect"); CallInst *Reflect = cast<CallInst>(U); @@ -158,16 +179,23 @@ bool NVVMReflect::handleFunction(Function *ReflectFunction) { const Value *Sym = GEP->getOperand(0); assert(isa<Constant>(Sym) && "Format of _reflect function not recognized"); - const Constant *SymStr = cast<Constant>(Sym); + const Value *Operand = cast<Constant>(Sym)->getOperand(0); + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Operand)) { + // For CUDA-7.0 style __nvvm_reflect calls we need to find operand's + // initializer. + assert(GV->hasInitializer() && + "Format of _reflect function not recognized"); + const Constant *Initializer = GV->getInitializer(); + Operand = Initializer; + } - assert(isa<ConstantDataSequential>(SymStr->getOperand(0)) && + assert(isa<ConstantDataSequential>(Operand) && "Format of _reflect function not recognized"); - - assert(cast<ConstantDataSequential>(SymStr->getOperand(0))->isCString() && + assert(cast<ConstantDataSequential>(Operand)->isCString() && "Format of _reflect function not recognized"); std::string ReflectArg = - cast<ConstantDataSequential>(SymStr->getOperand(0))->getAsString(); + cast<ConstantDataSequential>(Operand)->getAsString(); ReflectArg = ReflectArg.substr(0, ReflectArg.size() - 1); DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n"); |