summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp')
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp269
1 files changed, 190 insertions, 79 deletions
diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 029b44c..7ef062e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/ScopedHashTable.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -263,7 +264,6 @@ namespace {
/// expected that a later pass of GVN will catch the interesting/hard cases.
class EarlyCSE {
public:
- Function &F;
const TargetLibraryInfo &TLI;
const TargetTransformInfo &TTI;
DominatorTree &DT;
@@ -281,20 +281,37 @@ public:
/// that dominated values can succeed in their lookup.
ScopedHTType AvailableValues;
- /// \brief A scoped hash table of the current values of loads.
+ /// A scoped hash table of the current values of previously encounted memory
+ /// locations.
///
- /// This allows us to get efficient access to dominating loads when we have
- /// a fully redundant load. In addition to the most recent load, we keep
- /// track of a generation count of the read, which is compared against the
- /// current generation count. The current generation count is incremented
+ /// This allows us to get efficient access to dominating loads or stores when
+ /// we have a fully redundant load. In addition to the most recent load, we
+ /// keep track of a generation count of the read, which is compared against
+ /// the current generation count. The current generation count is incremented
/// after every possibly writing memory operation, which ensures that we only
- /// CSE loads with other loads that have no intervening store.
- typedef RecyclingAllocator<
- BumpPtrAllocator,
- ScopedHashTableVal<Value *, std::pair<Value *, unsigned>>>
+ /// CSE loads with other loads that have no intervening store. Ordering
+ /// events (such as fences or atomic instructions) increment the generation
+ /// count as well; essentially, we model these as writes to all possible
+ /// locations. Note that atomic and/or volatile loads and stores can be
+ /// present the table; it is the responsibility of the consumer to inspect
+ /// the atomicity/volatility if needed.
+ struct LoadValue {
+ Value *Data;
+ unsigned Generation;
+ int MatchingId;
+ bool IsAtomic;
+ LoadValue()
+ : Data(nullptr), Generation(0), MatchingId(-1), IsAtomic(false) {}
+ LoadValue(Value *Data, unsigned Generation, unsigned MatchingId,
+ bool IsAtomic)
+ : Data(Data), Generation(Generation), MatchingId(MatchingId),
+ IsAtomic(IsAtomic) {}
+ };
+ typedef RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<Value *, LoadValue>>
LoadMapAllocator;
- typedef ScopedHashTable<Value *, std::pair<Value *, unsigned>,
- DenseMapInfo<Value *>, LoadMapAllocator> LoadHTType;
+ typedef ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,
+ LoadMapAllocator> LoadHTType;
LoadHTType AvailableLoads;
/// \brief A scoped hash table of the current values of read-only call
@@ -308,10 +325,9 @@ public:
unsigned CurrentGeneration;
/// \brief Set up the EarlyCSE runner for a particular function.
- EarlyCSE(Function &F, const TargetLibraryInfo &TLI,
- const TargetTransformInfo &TTI, DominatorTree &DT,
- AssumptionCache &AC)
- : F(F), TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {}
+ EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI,
+ DominatorTree &DT, AssumptionCache &AC)
+ : TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {}
bool run();
@@ -382,57 +398,91 @@ private:
class ParseMemoryInst {
public:
ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
- : Load(false), Store(false), Vol(false), MayReadFromMemory(false),
- MayWriteToMemory(false), MatchingId(-1), Ptr(nullptr) {
- MayReadFromMemory = Inst->mayReadFromMemory();
- MayWriteToMemory = Inst->mayWriteToMemory();
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- MemIntrinsicInfo Info;
- if (!TTI.getTgtMemIntrinsic(II, Info))
- return;
- if (Info.NumMemRefs == 1) {
- Store = Info.WriteMem;
- Load = Info.ReadMem;
- MatchingId = Info.MatchingId;
- MayReadFromMemory = Info.ReadMem;
- MayWriteToMemory = Info.WriteMem;
- Vol = Info.Vol;
- Ptr = Info.PtrVal;
- }
- } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- Load = true;
- Vol = !LI->isSimple();
- Ptr = LI->getPointerOperand();
+ : IsTargetMemInst(false), Inst(Inst) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
+ if (TTI.getTgtMemIntrinsic(II, Info) && Info.NumMemRefs == 1)
+ IsTargetMemInst = true;
+ }
+ bool isLoad() const {
+ if (IsTargetMemInst) return Info.ReadMem;
+ return isa<LoadInst>(Inst);
+ }
+ bool isStore() const {
+ if (IsTargetMemInst) return Info.WriteMem;
+ return isa<StoreInst>(Inst);
+ }
+ bool isAtomic() const {
+ if (IsTargetMemInst) {
+ assert(Info.IsSimple && "need to refine IsSimple in TTI");
+ return false;
+ }
+ return Inst->isAtomic();
+ }
+ bool isUnordered() const {
+ if (IsTargetMemInst) {
+ assert(Info.IsSimple && "need to refine IsSimple in TTI");
+ return true;
+ }
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ return LI->isUnordered();
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ return SI->isUnordered();
+ }
+ // Conservative answer
+ return !Inst->isAtomic();
+ }
+
+ bool isVolatile() const {
+ if (IsTargetMemInst) {
+ assert(Info.IsSimple && "need to refine IsSimple in TTI");
+ return false;
+ }
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ return LI->isVolatile();
} else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- Store = true;
- Vol = !SI->isSimple();
- Ptr = SI->getPointerOperand();
+ return SI->isVolatile();
}
+ // Conservative answer
+ return true;
}
- bool isLoad() { return Load; }
- bool isStore() { return Store; }
- bool isVolatile() { return Vol; }
- bool isMatchingMemLoc(const ParseMemoryInst &Inst) {
- return Ptr == Inst.Ptr && MatchingId == Inst.MatchingId;
+
+
+ bool isMatchingMemLoc(const ParseMemoryInst &Inst) const {
+ return (getPointerOperand() == Inst.getPointerOperand() &&
+ getMatchingId() == Inst.getMatchingId());
}
- bool isValid() { return Ptr != nullptr; }
- int getMatchingId() { return MatchingId; }
- Value *getPtr() { return Ptr; }
- bool mayReadFromMemory() { return MayReadFromMemory; }
- bool mayWriteToMemory() { return MayWriteToMemory; }
+ bool isValid() const { return getPointerOperand() != nullptr; }
- private:
- bool Load;
- bool Store;
- bool Vol;
- bool MayReadFromMemory;
- bool MayWriteToMemory;
// For regular (non-intrinsic) loads/stores, this is set to -1. For
// intrinsic loads/stores, the id is retrieved from the corresponding
// field in the MemIntrinsicInfo structure. That field contains
// non-negative values only.
- int MatchingId;
- Value *Ptr;
+ int getMatchingId() const {
+ if (IsTargetMemInst) return Info.MatchingId;
+ return -1;
+ }
+ Value *getPointerOperand() const {
+ if (IsTargetMemInst) return Info.PtrVal;
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ return LI->getPointerOperand();
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ return SI->getPointerOperand();
+ }
+ return nullptr;
+ }
+ bool mayReadFromMemory() const {
+ if (IsTargetMemInst) return Info.ReadMem;
+ return Inst->mayReadFromMemory();
+ }
+ bool mayWriteToMemory() const {
+ if (IsTargetMemInst) return Info.WriteMem;
+ return Inst->mayWriteToMemory();
+ }
+
+ private:
+ bool IsTargetMemInst;
+ MemIntrinsicInfo Info;
+ Instruction *Inst;
};
bool processNode(DomTreeNode *Node);
@@ -497,7 +547,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// See if any instructions in the block can be eliminated. If so, do it. If
// not, add them to AvailableValues.
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
- Instruction *Inst = I++;
+ Instruction *Inst = &*I++;
// Dead instructions should just be removed.
if (isInstructionTriviallyDead(Inst, &TLI)) {
@@ -548,24 +598,26 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
ParseMemoryInst MemInst(Inst, TTI);
// If this is a non-volatile load, process it.
if (MemInst.isValid() && MemInst.isLoad()) {
- // Ignore volatile loads.
- if (MemInst.isVolatile()) {
+ // (conservatively) we can't peak past the ordering implied by this
+ // operation, but we can add this load to our set of available values
+ if (MemInst.isVolatile() || !MemInst.isUnordered()) {
LastStore = nullptr;
- // Don't CSE across synchronization boundaries.
- if (Inst->mayWriteToMemory())
- ++CurrentGeneration;
- continue;
+ ++CurrentGeneration;
}
// If we have an available version of this load, and if it is the right
// generation, replace this instruction.
- std::pair<Value *, unsigned> InVal =
- AvailableLoads.lookup(MemInst.getPtr());
- if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
- Value *Op = getOrCreateResult(InVal.first, Inst->getType());
+ LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
+ if (InVal.Data != nullptr && InVal.Generation == CurrentGeneration &&
+ InVal.MatchingId == MemInst.getMatchingId() &&
+ // We don't yet handle removing loads with ordering of any kind.
+ !MemInst.isVolatile() && MemInst.isUnordered() &&
+ // We can't replace an atomic load with one which isn't also atomic.
+ InVal.IsAtomic >= MemInst.isAtomic()) {
+ Value *Op = getOrCreateResult(InVal.Data, Inst->getType());
if (Op != nullptr) {
DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
- << " to: " << *InVal.first << '\n');
+ << " to: " << *InVal.Data << '\n');
if (!Inst->use_empty())
Inst->replaceAllUsesWith(Op);
Inst->eraseFromParent();
@@ -576,8 +628,10 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
}
// Otherwise, remember that we have this instruction.
- AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>(
- Inst, CurrentGeneration));
+ AvailableLoads.insert(
+ MemInst.getPointerOperand(),
+ LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
+ MemInst.isAtomic()));
LastStore = nullptr;
continue;
}
@@ -613,6 +667,44 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
continue;
}
+ // A release fence requires that all stores complete before it, but does
+ // not prevent the reordering of following loads 'before' the fence. As a
+ // result, we don't need to consider it as writing to memory and don't need
+ // to advance the generation. We do need to prevent DSE across the fence,
+ // but that's handled above.
+ if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
+ if (FI->getOrdering() == Release) {
+ assert(Inst->mayReadFromMemory() && "relied on to prevent DSE above");
+ continue;
+ }
+
+ // write back DSE - If we write back the same value we just loaded from
+ // the same location and haven't passed any intervening writes or ordering
+ // operations, we can remove the write. The primary benefit is in allowing
+ // the available load table to remain valid and value forward past where
+ // the store originally was.
+ if (MemInst.isValid() && MemInst.isStore()) {
+ LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
+ if (InVal.Data &&
+ InVal.Data == getOrCreateResult(Inst, InVal.Data->getType()) &&
+ InVal.Generation == CurrentGeneration &&
+ InVal.MatchingId == MemInst.getMatchingId() &&
+ // We don't yet handle removing stores with ordering of any kind.
+ !MemInst.isVolatile() && MemInst.isUnordered()) {
+ assert((!LastStore ||
+ ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
+ MemInst.getPointerOperand()) &&
+ "can't have an intervening store!");
+ DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n');
+ Inst->eraseFromParent();
+ Changed = true;
+ ++NumDSE;
+ // We can avoid incrementing the generation count since we were able
+ // to eliminate this store.
+ continue;
+ }
+ }
+
// Okay, this isn't something we can CSE at all. Check to see if it is
// something that could modify memory. If so, our available memory values
// cannot be used so bump the generation count.
@@ -622,8 +714,16 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
if (MemInst.isValid() && MemInst.isStore()) {
// We do a trivial form of DSE if there are two stores to the same
// location with no intervening loads. Delete the earlier store.
+ // At the moment, we don't remove ordered stores, but do remove
+ // unordered atomic stores. There's no special requirement (for
+ // unordered atomics) about removing atomic stores only in favor of
+ // other atomic stores since we we're going to execute the non-atomic
+ // one anyway and the atomic one might never have become visible.
if (LastStore) {
ParseMemoryInst LastStoreMemInst(LastStore, TTI);
+ assert(LastStoreMemInst.isUnordered() &&
+ !LastStoreMemInst.isVolatile() &&
+ "Violated invariant");
if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
<< " due to: " << *Inst << '\n');
@@ -640,12 +740,22 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// version of the pointer. It is safe to forward from volatile stores
// to non-volatile loads, so we don't have to check for volatility of
// the store.
- AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>(
- Inst, CurrentGeneration));
-
- // Remember that this was the last store we saw for DSE.
- if (!MemInst.isVolatile())
+ AvailableLoads.insert(
+ MemInst.getPointerOperand(),
+ LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
+ MemInst.isAtomic()));
+
+ // Remember that this was the last unordered store we saw for DSE. We
+ // don't yet handle DSE on ordered or volatile stores since we don't
+ // have a good way to model the ordering requirement for following
+ // passes once the store is removed. We could insert a fence, but
+ // since fences are slightly stronger than stores in their ordering,
+ // it's not clear this is a profitable transform. Another option would
+ // be to merge the ordering with that of the post dominating store.
+ if (MemInst.isUnordered() && !MemInst.isVolatile())
LastStore = Inst;
+ else
+ LastStore = nullptr;
}
}
}
@@ -714,7 +824,7 @@ PreservedAnalyses EarlyCSEPass::run(Function &F,
auto &DT = AM->getResult<DominatorTreeAnalysis>(F);
auto &AC = AM->getResult<AssumptionAnalysis>(F);
- EarlyCSE CSE(F, TLI, TTI, DT, AC);
+ EarlyCSE CSE(TLI, TTI, DT, AC);
if (!CSE.run())
return PreservedAnalyses::all();
@@ -751,7 +861,7 @@ public:
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- EarlyCSE CSE(F, TLI, TTI, DT, AC);
+ EarlyCSE CSE(TLI, TTI, DT, AC);
return CSE.run();
}
@@ -761,6 +871,7 @@ public:
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
AU.setPreservesCFG();
}
};
OpenPOWER on IntegriCloud