diff options
Diffstat (limited to 'contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp | 145 |
1 files changed, 78 insertions, 67 deletions
diff --git a/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp b/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp index 56ba719..a1da717 100644 --- a/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -22,6 +22,7 @@ #include "SIInstrInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Verifier.h" @@ -38,6 +39,7 @@ using namespace llvm; extern "C" void LLVMInitializeR600Target() { // Register the target RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget); + RegisterTargetMachine<GCNTargetMachine> Y(TheGCNTarget); } static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { @@ -48,46 +50,20 @@ static MachineSchedRegistry SchedCustomRegistry("r600", "Run R600's custom scheduler", createR600MachineScheduler); -static std::string computeDataLayout(const AMDGPUSubtarget &ST) { - std::string Ret = "e-p:32:32"; - - if (ST.is64bit()) { - // 32-bit local, and region pointers. 64-bit private, global, and constant. - Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64"; - } - - Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256" - "-v512:512-v1024:1024-v2048:2048-n32:64"; - - return Ret; -} - AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - TargetOptions Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OptLevel -) -: - LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel), - Subtarget(TT, CPU, FS), - Layout(computeDataLayout(Subtarget)), - FrameLowering(TargetFrameLowering::StackGrowsUp, - 64 * 16 // Maximum stack alignment (long16) - , 0), - IntrinsicInfo(this), - InstrItins(&Subtarget.getInstrItineraryData()) { - // TLInfo uses InstrInfo so it must be initialized after. - if (Subtarget.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { - TLInfo.reset(new R600TargetLowering(*this)); - } else { - TLInfo.reset(new SITargetLowering(*this)); - } + StringRef CPU, StringRef FS, + TargetOptions Options, Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OptLevel) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel), + TLOF(new TargetLoweringObjectFileELF()), + Subtarget(TT, CPU, FS, *this), IntrinsicInfo() { setRequiresStructuredCFG(true); initAsmInfo(); } AMDGPUTargetMachine::~AMDGPUTargetMachine() { + delete TLOF; } namespace { @@ -108,13 +84,14 @@ public: return nullptr; } - virtual void addCodeGenPrepare(); + void addIRPasses() override; + void addCodeGenPrepare() override; bool addPreISel() override; bool addInstSelector() override; - bool addPreRegAlloc() override; - bool addPostRegAlloc() override; - bool addPreSched2() override; - bool addPreEmitPass() override; + void addPreRegAlloc() override; + void addPostRegAlloc() override; + void addPreSched2() override; + void addPreEmitPass() override; }; } // End of anonymous namespace @@ -134,6 +111,19 @@ void AMDGPUTargetMachine::addAnalysisPasses(PassManagerBase &PM) { PM.add(createAMDGPUTargetTransformInfoPass(this)); } +void AMDGPUPassConfig::addIRPasses() { + // Function calls are not supported, so make sure we inline everything. + addPass(createAMDGPUAlwaysInlinePass()); + addPass(createAlwaysInlinerPass()); + // We need to add the barrier noop pass, otherwise adding the function + // inlining pass will cause all of the PassConfigs passes to be run + // one function at a time, which means if we have a nodule with two + // functions, then we will generate code for the first function + // without ever running any passes on the second. + addPass(createBarrierNoopPass()); + TargetPassConfig::addIRPasses(); +} + void AMDGPUPassConfig::addCodeGenPrepare() { const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); if (ST.isPromoteAllocaEnabled()) { @@ -161,61 +151,82 @@ AMDGPUPassConfig::addPreISel() { } bool AMDGPUPassConfig::addInstSelector() { + const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); + addPass(createAMDGPUISelDag(getAMDGPUTargetMachine())); - addPass(createSILowerI1CopiesPass()); + + if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { + addPass(createSILowerI1CopiesPass()); + addPass(createSIFixSGPRCopiesPass(*TM)); + addPass(createSIFoldOperandsPass()); + } + return false; } -bool AMDGPUPassConfig::addPreRegAlloc() { +void AMDGPUPassConfig::addPreRegAlloc() { const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { addPass(createR600VectorRegMerger(*TM)); } else { - addPass(createSIFixSGPRCopiesPass(*TM)); - // SIFixSGPRCopies can generate a lot of duplicate instructions, - // so we need to run MachineCSE afterwards. - addPass(&MachineCSEID); - addPass(createSIShrinkInstructionsPass()); - initializeSIFixSGPRLiveRangesPass(*PassRegistry::getPassRegistry()); - insertPass(&RegisterCoalescerID, &SIFixSGPRLiveRangesID); + if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) { + // Don't do this with no optimizations since it throws away debug info by + // merging nonadjacent loads. + + // This should be run after scheduling, but before register allocation. It + // also need extra copies to the address operand to be eliminated. + initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry()); + insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID); + } + + addPass(createSIShrinkInstructionsPass(), false); + addPass(createSIFixSGPRLiveRangesPass(), false); } - return false; } -bool AMDGPUPassConfig::addPostRegAlloc() { +void AMDGPUPassConfig::addPostRegAlloc() { const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); - addPass(createSIShrinkInstructionsPass()); if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { - addPass(createSIInsertWaits(*TM)); + addPass(createSIPrepareScratchRegs(), false); + addPass(createSIShrinkInstructionsPass(), false); } - return false; } -bool AMDGPUPassConfig::addPreSched2() { +void AMDGPUPassConfig::addPreSched2() { const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) - addPass(createR600EmitClauseMarkers()); + addPass(createR600EmitClauseMarkers(), false); if (ST.isIfCvtEnabled()) - addPass(&IfConverterID); + addPass(&IfConverterID, false); if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) - addPass(createR600ClauseMergePass(*TM)); - return false; + addPass(createR600ClauseMergePass(*TM), false); + if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { + addPass(createSIInsertWaits(*TM), false); + } } -bool AMDGPUPassConfig::addPreEmitPass() { +void AMDGPUPassConfig::addPreEmitPass() { const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { - addPass(createAMDGPUCFGStructurizerPass()); - addPass(createR600ExpandSpecialInstrsPass(*TM)); - addPass(&FinalizeMachineBundlesID); - addPass(createR600Packetizer(*TM)); - addPass(createR600ControlFlowFinalizer(*TM)); + addPass(createAMDGPUCFGStructurizerPass(), false); + addPass(createR600ExpandSpecialInstrsPass(*TM), false); + addPass(&FinalizeMachineBundlesID, false); + addPass(createR600Packetizer(*TM), false); + addPass(createR600ControlFlowFinalizer(*TM), false); } else { - addPass(createSILowerControlFlowPass(*TM)); + addPass(createSILowerControlFlowPass(*TM), false); } - - return false; } + + +//===----------------------------------------------------------------------===// +// GCN Target Machine (SI+) +//===----------------------------------------------------------------------===// + +GCNTargetMachine::GCNTargetMachine(const Target &T, StringRef TT, StringRef FS, + StringRef CPU, TargetOptions Options, Reloc::Model RM, + CodeModel::Model CM, CodeGenOpt::Level OL) : + AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) { } |