summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp')
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp60
1 files changed, 45 insertions, 15 deletions
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 248f9e1..aa931b1 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -53,6 +53,7 @@ void initializeGenericToNVVMPass(PassRegistry&);
void initializeNVPTXAllocaHoistingPass(PassRegistry &);
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
+void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
void initializeNVPTXLowerKernelArgsPass(PassRegistry &);
void initializeNVPTXLowerAllocaPass(PassRegistry &);
}
@@ -64,14 +65,15 @@ extern "C" void LLVMInitializeNVPTXTarget() {
// FIXME: This pass is really intended to be invoked during IR optimization,
// but it's very NVPTX-specific.
- initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
- initializeGenericToNVVMPass(*PassRegistry::getPassRegistry());
- initializeNVPTXAllocaHoistingPass(*PassRegistry::getPassRegistry());
- initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry());
- initializeNVPTXFavorNonGenericAddrSpacesPass(
- *PassRegistry::getPassRegistry());
- initializeNVPTXLowerKernelArgsPass(*PassRegistry::getPassRegistry());
- initializeNVPTXLowerAllocaPass(*PassRegistry::getPassRegistry());
+ PassRegistry &PR = *PassRegistry::getPassRegistry();
+ initializeNVVMReflectPass(PR);
+ initializeGenericToNVVMPass(PR);
+ initializeNVPTXAllocaHoistingPass(PR);
+ initializeNVPTXAssignValidGlobalNamesPass(PR);
+ initializeNVPTXFavorNonGenericAddrSpacesPass(PR);
+ initializeNVPTXLowerKernelArgsPass(PR);
+ initializeNVPTXLowerAllocaPass(PR);
+ initializeNVPTXLowerAggrCopiesPass(PR);
}
static std::string computeDataLayout(bool is64Bit) {
@@ -139,6 +141,10 @@ public:
FunctionPass *createTargetRegisterAllocator(bool) override;
void addFastRegAlloc(FunctionPass *RegAllocPass) override;
void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
+
+private:
+ // if the opt level is aggressive, add GVN; otherwise, add EarlyCSE.
+ void addEarlyCSEOrGVNPass();
};
} // end anonymous namespace
@@ -148,11 +154,18 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
}
TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](Function &F) {
+ return TargetIRAnalysis([this](const Function &F) {
return TargetTransformInfo(NVPTXTTIImpl(this, F));
});
}
+void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
+ if (getOptLevel() == CodeGenOpt::Aggressive)
+ addPass(createGVNPass());
+ else
+ addPass(createEarlyCSEPass());
+}
+
void NVPTXPassConfig::addIRPasses() {
// The following passes are known to not play well with virtual regs hanging
// around after register allocation (which in our case, is *all* registers).
@@ -161,13 +174,14 @@ void NVPTXPassConfig::addIRPasses() {
// NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
disablePass(&PrologEpilogCodeInserterID);
disablePass(&MachineCopyPropagationID);
- disablePass(&BranchFolderPassID);
disablePass(&TailDuplicateID);
+ addPass(createNVVMReflectPass());
addPass(createNVPTXImageOptimizerPass());
- TargetPassConfig::addIRPasses();
addPass(createNVPTXAssignValidGlobalNamesPass());
addPass(createGenericToNVVMPass());
+
+ // === Propagate special address spaces ===
addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
// NVPTXLowerKernelArgs emits alloca for byval parameters which can often
// be eliminated by SROA.
@@ -178,22 +192,38 @@ void NVPTXPassConfig::addIRPasses() {
// them unused. We could remove dead code in an ad-hoc manner, but that
// requires manual work and might be error-prone.
addPass(createDeadCodeEliminationPass());
+
+ // === Straight-line scalar optimizations ===
addPass(createSeparateConstOffsetFromGEPPass());
+ addPass(createSpeculativeExecutionPass());
// ReassociateGEPs exposes more opportunites for SLSR. See
// the example in reassociate-geps-and-slsr.ll.
addPass(createStraightLineStrengthReducePass());
// SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
// EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE
// for some of our benchmarks.
- if (getOptLevel() == CodeGenOpt::Aggressive)
- addPass(createGVNPass());
- else
- addPass(createEarlyCSEPass());
+ addEarlyCSEOrGVNPass();
// Run NaryReassociate after EarlyCSE/GVN to be more effective.
addPass(createNaryReassociatePass());
// NaryReassociate on GEPs creates redundant common expressions, so run
// EarlyCSE after it.
addPass(createEarlyCSEPass());
+
+ // === LSR and other generic IR passes ===
+ TargetPassConfig::addIRPasses();
+ // EarlyCSE is not always strong enough to clean up what LSR produces. For
+ // example, GVN can combine
+ //
+ // %0 = add %a, %b
+ // %1 = add %b, %a
+ //
+ // and
+ //
+ // %0 = shl nsw %a, 2
+ // %1 = shl %a, 2
+ //
+ // but EarlyCSE can do neither of them.
+ addEarlyCSEOrGVNPass();
}
bool NVPTXPassConfig::addInstSelector() {
OpenPOWER on IntegriCloud