diff options
Diffstat (limited to 'contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 322 |
1 files changed, 322 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp new file mode 100644 index 0000000..aa931b1 --- /dev/null +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -0,0 +1,322 @@ +//===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Top-level implementation for the NVPTX target. +// +//===----------------------------------------------------------------------===// + +#include "NVPTXTargetMachine.h" +#include "MCTargetDesc/NVPTXMCAsmInfo.h" +#include "NVPTX.h" +#include "NVPTXAllocaHoisting.h" +#include "NVPTXLowerAggrCopies.h" +#include "NVPTXTargetObjectFile.h" +#include "NVPTXTargetTransformInfo.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Verifier.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Scalar.h" + +using namespace llvm; + +namespace llvm { +void initializeNVVMReflectPass(PassRegistry&); +void initializeGenericToNVVMPass(PassRegistry&); +void initializeNVPTXAllocaHoistingPass(PassRegistry &); +void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); +void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); +void initializeNVPTXLowerAggrCopiesPass(PassRegistry &); +void initializeNVPTXLowerKernelArgsPass(PassRegistry &); +void initializeNVPTXLowerAllocaPass(PassRegistry &); +} + +extern "C" void LLVMInitializeNVPTXTarget() { + // Register the target. + RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32); + RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64); + + // FIXME: This pass is really intended to be invoked during IR optimization, + // but it's very NVPTX-specific. + PassRegistry &PR = *PassRegistry::getPassRegistry(); + initializeNVVMReflectPass(PR); + initializeGenericToNVVMPass(PR); + initializeNVPTXAllocaHoistingPass(PR); + initializeNVPTXAssignValidGlobalNamesPass(PR); + initializeNVPTXFavorNonGenericAddrSpacesPass(PR); + initializeNVPTXLowerKernelArgsPass(PR); + initializeNVPTXLowerAllocaPass(PR); + initializeNVPTXLowerAggrCopiesPass(PR); +} + +static std::string computeDataLayout(bool is64Bit) { + std::string Ret = "e"; + + if (!is64Bit) + Ret += "-p:32:32"; + + Ret += "-i64:64-v16:16-v32:32-n16:32:64"; + + return Ret; +} + +NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool is64bit) + : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM, + CM, OL), + is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()), + Subtarget(TT, CPU, FS, *this) { + if (TT.getOS() == Triple::NVCL) + drvInterface = NVPTX::NVCL; + else + drvInterface = NVPTX::CUDA; + initAsmInfo(); +} + +NVPTXTargetMachine::~NVPTXTargetMachine() {} + +void NVPTXTargetMachine32::anchor() {} + +NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} + +void NVPTXTargetMachine64::anchor() {} + +NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} + +namespace { +class NVPTXPassConfig : public TargetPassConfig { +public: + NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + NVPTXTargetMachine &getNVPTXTargetMachine() const { + return getTM<NVPTXTargetMachine>(); + } + + void addIRPasses() override; + bool addInstSelector() override; + void addPostRegAlloc() override; + void addMachineSSAOptimization() override; + + FunctionPass *createTargetRegisterAllocator(bool) override; + void addFastRegAlloc(FunctionPass *RegAllocPass) override; + void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; + +private: + // if the opt level is aggressive, add GVN; otherwise, add EarlyCSE. + void addEarlyCSEOrGVNPass(); +}; +} // end anonymous namespace + +TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { + NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM); + return PassConfig; +} + +TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() { + return TargetIRAnalysis([this](const Function &F) { + return TargetTransformInfo(NVPTXTTIImpl(this, F)); + }); +} + +void NVPTXPassConfig::addEarlyCSEOrGVNPass() { + if (getOptLevel() == CodeGenOpt::Aggressive) + addPass(createGVNPass()); + else + addPass(createEarlyCSEPass()); +} + +void NVPTXPassConfig::addIRPasses() { + // The following passes are known to not play well with virtual regs hanging + // around after register allocation (which in our case, is *all* registers). + // We explicitly disable them here. We do, however, need some functionality + // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the + // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). + disablePass(&PrologEpilogCodeInserterID); + disablePass(&MachineCopyPropagationID); + disablePass(&TailDuplicateID); + + addPass(createNVVMReflectPass()); + addPass(createNVPTXImageOptimizerPass()); + addPass(createNVPTXAssignValidGlobalNamesPass()); + addPass(createGenericToNVVMPass()); + + // === Propagate special address spaces === + addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine())); + // NVPTXLowerKernelArgs emits alloca for byval parameters which can often + // be eliminated by SROA. + addPass(createSROAPass()); + addPass(createNVPTXLowerAllocaPass()); + addPass(createNVPTXFavorNonGenericAddrSpacesPass()); + // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave + // them unused. We could remove dead code in an ad-hoc manner, but that + // requires manual work and might be error-prone. + addPass(createDeadCodeEliminationPass()); + + // === Straight-line scalar optimizations === + addPass(createSeparateConstOffsetFromGEPPass()); + addPass(createSpeculativeExecutionPass()); + // ReassociateGEPs exposes more opportunites for SLSR. See + // the example in reassociate-geps-and-slsr.ll. + addPass(createStraightLineStrengthReducePass()); + // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or + // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE + // for some of our benchmarks. + addEarlyCSEOrGVNPass(); + // Run NaryReassociate after EarlyCSE/GVN to be more effective. + addPass(createNaryReassociatePass()); + // NaryReassociate on GEPs creates redundant common expressions, so run + // EarlyCSE after it. + addPass(createEarlyCSEPass()); + + // === LSR and other generic IR passes === + TargetPassConfig::addIRPasses(); + // EarlyCSE is not always strong enough to clean up what LSR produces. For + // example, GVN can combine + // + // %0 = add %a, %b + // %1 = add %b, %a + // + // and + // + // %0 = shl nsw %a, 2 + // %1 = shl %a, 2 + // + // but EarlyCSE can do neither of them. + addEarlyCSEOrGVNPass(); +} + +bool NVPTXPassConfig::addInstSelector() { + const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl(); + + addPass(createLowerAggrCopies()); + addPass(createAllocaHoisting()); + addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); + + if (!ST.hasImageHandles()) + addPass(createNVPTXReplaceImageHandlesPass()); + + return false; +} + +void NVPTXPassConfig::addPostRegAlloc() { + addPass(createNVPTXPrologEpilogPass(), false); + // NVPTXPrologEpilogPass calculates frame object offset and replace frame + // index with VRFrame register. NVPTXPeephole need to be run after that and + // will replace VRFrame with VRFrameLocal when possible. + addPass(createNVPTXPeephole()); +} + +FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { + return nullptr; // No reg alloc +} + +void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { + assert(!RegAllocPass && "NVPTX uses no regalloc!"); + addPass(&PHIEliminationID); + addPass(&TwoAddressInstructionPassID); +} + +void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { + assert(!RegAllocPass && "NVPTX uses no regalloc!"); + + addPass(&ProcessImplicitDefsID); + addPass(&LiveVariablesID); + addPass(&MachineLoopInfoID); + addPass(&PHIEliminationID); + + addPass(&TwoAddressInstructionPassID); + addPass(&RegisterCoalescerID); + + // PreRA instruction scheduling. + if (addPass(&MachineSchedulerID)) + printAndVerify("After Machine Scheduling"); + + + addPass(&StackSlotColoringID); + + // FIXME: Needs physical registers + //addPass(&PostRAMachineLICMID); + + printAndVerify("After StackSlotColoring"); +} + +void NVPTXPassConfig::addMachineSSAOptimization() { + // Pre-ra tail duplication. + if (addPass(&EarlyTailDuplicateID)) + printAndVerify("After Pre-RegAlloc TailDuplicate"); + + // Optimize PHIs before DCE: removing dead PHI cycles may make more + // instructions dead. + addPass(&OptimizePHIsID); + + // This pass merges large allocas. StackSlotColoring is a different pass + // which merges spill slots. + addPass(&StackColoringID); + + // If the target requests it, assign local variables to stack slots relative + // to one another and simplify frame index references where possible. + addPass(&LocalStackSlotAllocationID); + + // With optimization, dead code should already be eliminated. However + // there is one known exception: lowered code for arguments that are only + // used by tail calls, where the tail calls reuse the incoming stack + // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). + addPass(&DeadMachineInstructionElimID); + printAndVerify("After codegen DCE pass"); + + // Allow targets to insert passes that improve instruction level parallelism, + // like if-conversion. Such passes will typically need dominator trees and + // loop info, just like LICM and CSE below. + if (addILPOpts()) + printAndVerify("After ILP optimizations"); + + addPass(&MachineLICMID); + addPass(&MachineCSEID); + + addPass(&MachineSinkingID); + printAndVerify("After Machine LICM, CSE and Sinking passes"); + + addPass(&PeepholeOptimizerID); + printAndVerify("After codegen peephole optimization pass"); +} |