summaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Scalar
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp9
-rw-r--r--lib/Transforms/Scalar/GVN.cpp391
-rw-r--r--lib/Transforms/Scalar/GlobalMerge.cpp5
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp43
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp35
-rw-r--r--lib/Transforms/Scalar/SROA.cpp31
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp17
7 files changed, 269 insertions, 262 deletions
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 015fd2e..f0d29c8 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/ValueMap.h"
#include "llvm/Analysis/DominatorInternals.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -88,7 +89,7 @@ namespace {
/// Keeps track of non-local addresses that have been sunk into a block.
/// This allows us to avoid inserting duplicate code for blocks with
/// multiple load/stores of the same address.
- DenseMap<Value*, Value*> SunkAddrs;
+ ValueMap<Value*, Value*> SunkAddrs;
/// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
/// be updated.
@@ -1653,10 +1654,6 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// start of the block.
CurInstIterator = BB->begin();
SunkAddrs.clear();
- } else {
- // This address is now available for reassignment, so erase the table
- // entry; we don't want to match some completely different instruction.
- SunkAddrs[Addr] = 0;
}
}
++NumMemoryInsts;
@@ -1761,7 +1758,7 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
if (!DefIsLiveOut)
return false;
- // Make sure non of the uses are PHI nodes.
+ // Make sure none of the uses are PHI nodes.
for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
UI != E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 129af8d..f350b9b 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -498,6 +498,75 @@ void ValueTable::verifyRemoved(const Value *V) const {
//===----------------------------------------------------------------------===//
namespace {
+ class GVN;
+ struct AvailableValueInBlock {
+ /// BB - The basic block in question.
+ BasicBlock *BB;
+ enum ValType {
+ SimpleVal, // A simple offsetted value that is accessed.
+ LoadVal, // A value produced by a load.
+ MemIntrin // A memory intrinsic which is loaded from.
+ };
+
+ /// V - The value that is live out of the block.
+ PointerIntPair<Value *, 2, ValType> Val;
+
+ /// Offset - The byte offset in Val that is interesting for the load query.
+ unsigned Offset;
+
+ static AvailableValueInBlock get(BasicBlock *BB, Value *V,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(V);
+ Res.Val.setInt(SimpleVal);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(MI);
+ Res.Val.setInt(MemIntrin);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(LI);
+ Res.Val.setInt(LoadVal);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
+ bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
+ bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+
+ Value *getSimpleValue() const {
+ assert(isSimpleValue() && "Wrong accessor");
+ return Val.getPointer();
+ }
+
+ LoadInst *getCoercedLoadValue() const {
+ assert(isCoercedLoadValue() && "Wrong accessor");
+ return cast<LoadInst>(Val.getPointer());
+ }
+
+ MemIntrinsic *getMemIntrinValue() const {
+ assert(isMemIntrinValue() && "Wrong accessor");
+ return cast<MemIntrinsic>(Val.getPointer());
+ }
+
+ /// MaterializeAdjustedValue - Emit code into this block to adjust the value
+ /// defined here to the specified type. This handles various coercion cases.
+ Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const;
+ };
class GVN : public FunctionPass {
bool NoLoads;
@@ -519,6 +588,11 @@ namespace {
BumpPtrAllocator TableAllocator;
SmallVector<Instruction*, 8> InstrsToErase;
+
+ typedef SmallVector<NonLocalDepResult, 64> LoadDepVect;
+ typedef SmallVector<AvailableValueInBlock, 64> AvailValInBlkVect;
+ typedef SmallVector<BasicBlock*, 64> UnavailBlkVect;
+
public:
static char ID; // Pass identification, replacement for typeid
explicit GVN(bool noloads = false)
@@ -599,11 +673,17 @@ namespace {
}
- // Helper fuctions
- // FIXME: eliminate or document these better
+ // Helper fuctions of redundant load elimination
bool processLoad(LoadInst *L);
- bool processInstruction(Instruction *I);
bool processNonLocalLoad(LoadInst *L);
+ void AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
+ AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks);
+ bool PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks);
+
+ // Other helper routines
+ bool processInstruction(Instruction *I);
bool processBlock(BasicBlock *BB);
void dump(DenseMap<uint32_t, Value*> &d);
bool iterateOnFunction(Function &F);
@@ -1159,114 +1239,6 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
return ConstantFoldLoadFromConstPtr(Src, &TD);
}
-namespace {
-
-struct AvailableValueInBlock {
- /// BB - The basic block in question.
- BasicBlock *BB;
- enum ValType {
- SimpleVal, // A simple offsetted value that is accessed.
- LoadVal, // A value produced by a load.
- MemIntrin // A memory intrinsic which is loaded from.
- };
-
- /// V - The value that is live out of the block.
- PointerIntPair<Value *, 2, ValType> Val;
-
- /// Offset - The byte offset in Val that is interesting for the load query.
- unsigned Offset;
-
- static AvailableValueInBlock get(BasicBlock *BB, Value *V,
- unsigned Offset = 0) {
- AvailableValueInBlock Res;
- Res.BB = BB;
- Res.Val.setPointer(V);
- Res.Val.setInt(SimpleVal);
- Res.Offset = Offset;
- return Res;
- }
-
- static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
- unsigned Offset = 0) {
- AvailableValueInBlock Res;
- Res.BB = BB;
- Res.Val.setPointer(MI);
- Res.Val.setInt(MemIntrin);
- Res.Offset = Offset;
- return Res;
- }
-
- static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
- unsigned Offset = 0) {
- AvailableValueInBlock Res;
- Res.BB = BB;
- Res.Val.setPointer(LI);
- Res.Val.setInt(LoadVal);
- Res.Offset = Offset;
- return Res;
- }
-
- bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
- bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
- bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
-
- Value *getSimpleValue() const {
- assert(isSimpleValue() && "Wrong accessor");
- return Val.getPointer();
- }
-
- LoadInst *getCoercedLoadValue() const {
- assert(isCoercedLoadValue() && "Wrong accessor");
- return cast<LoadInst>(Val.getPointer());
- }
-
- MemIntrinsic *getMemIntrinValue() const {
- assert(isMemIntrinValue() && "Wrong accessor");
- return cast<MemIntrinsic>(Val.getPointer());
- }
-
- /// MaterializeAdjustedValue - Emit code into this block to adjust the value
- /// defined here to the specified type. This handles various coercion cases.
- Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
- Value *Res;
- if (isSimpleValue()) {
- Res = getSimpleValue();
- if (Res->getType() != LoadTy) {
- const DataLayout *TD = gvn.getDataLayout();
- assert(TD && "Need target data to handle type mismatch case");
- Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
- *TD);
-
- DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
- << *getSimpleValue() << '\n'
- << *Res << '\n' << "\n\n\n");
- }
- } else if (isCoercedLoadValue()) {
- LoadInst *Load = getCoercedLoadValue();
- if (Load->getType() == LoadTy && Offset == 0) {
- Res = Load;
- } else {
- Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
- gvn);
-
- DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " "
- << *getCoercedLoadValue() << '\n'
- << *Res << '\n' << "\n\n\n");
- }
- } else {
- const DataLayout *TD = gvn.getDataLayout();
- assert(TD && "Need target data to handle type mismatch case");
- Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
- LoadTy, BB->getTerminator(), *TD);
- DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
- << " " << *getMemIntrinValue() << '\n'
- << *Res << '\n' << "\n\n\n");
- }
- return Res;
- }
-};
-
-} // end anonymous namespace
/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
/// construct SSA form, allowing us to eliminate LI. This returns the value
@@ -1323,48 +1295,59 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
return V;
}
+Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
+ Value *Res;
+ if (isSimpleValue()) {
+ Res = getSimpleValue();
+ if (Res->getType() != LoadTy) {
+ const DataLayout *TD = gvn.getDataLayout();
+ assert(TD && "Need target data to handle type mismatch case");
+ Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
+ *TD);
+
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
+ << *getSimpleValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ } else if (isCoercedLoadValue()) {
+ LoadInst *Load = getCoercedLoadValue();
+ if (Load->getType() == LoadTy && Offset == 0) {
+ Res = Load;
+ } else {
+ Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
+ gvn);
+
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " "
+ << *getCoercedLoadValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ } else {
+ const DataLayout *TD = gvn.getDataLayout();
+ assert(TD && "Need target data to handle type mismatch case");
+ Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
+ LoadTy, BB->getTerminator(), *TD);
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+ << " " << *getMemIntrinValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
+ return Res;
+}
+
static bool isLifetimeStart(const Instruction *Inst) {
if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
return II->getIntrinsicID() == Intrinsic::lifetime_start;
return false;
}
-/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
-/// non-local by performing PHI construction.
-bool GVN::processNonLocalLoad(LoadInst *LI) {
- // Find the non-local dependencies of the load.
- SmallVector<NonLocalDepResult, 64> Deps;
- AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
- MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
- //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
- // << Deps.size() << *LI << '\n');
-
- // If we had to process more than one hundred blocks to find the
- // dependencies, this load isn't worth worrying about. Optimizing
- // it will be too expensive.
- unsigned NumDeps = Deps.size();
- if (NumDeps > 100)
- return false;
-
- // If we had a phi translation failure, we'll have a single entry which is a
- // clobber in the current block. Reject this early.
- if (NumDeps == 1 &&
- !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
- DEBUG(
- dbgs() << "GVN: non-local load ";
- WriteAsOperand(dbgs(), LI);
- dbgs() << " has unknown dependencies\n";
- );
- return false;
- }
+void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
+ AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks) {
// Filter out useless results (non-locals, etc). Keep track of the blocks
// where we have a value available in repl, also keep track of whether we see
// dependencies that produce an unknown value for the load (such as a call
// that could potentially clobber the load).
- SmallVector<AvailableValueInBlock, 64> ValuesPerBlock;
- SmallVector<BasicBlock*, 64> UnavailableBlocks;
-
+ unsigned NumDeps = Deps.size();
for (unsigned i = 0, e = NumDeps; i != e; ++i) {
BasicBlock *DepBB = Deps[i].getBB();
MemDepResult DepInfo = Deps[i].getResult();
@@ -1480,35 +1463,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
}
UnavailableBlocks.push_back(DepBB);
- continue;
}
+}
- // If we have no predecessors that produce a known value for this load, exit
- // early.
- if (ValuesPerBlock.empty()) return false;
-
- // If all of the instructions we depend on produce a known value for this
- // load, then it is fully redundant and we can use PHI insertion to compute
- // its value. Insert PHIs and remove the fully redundant value now.
- if (UnavailableBlocks.empty()) {
- DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
-
- // Perform PHI construction.
- Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
- LI->replaceAllUsesWith(V);
-
- if (isa<PHINode>(V))
- V->takeName(LI);
- if (V->getType()->getScalarType()->isPointerTy())
- MD->invalidateCachedPointerInfo(V);
- markInstructionForDeletion(LI);
- ++NumGVNLoad;
- return true;
- }
-
- if (!EnablePRE || !EnableLoadPRE)
- return false;
-
+bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks) {
// Okay, we have *some* definitions of the value. This means that the value
// is available in some of our (transitive) predecessors. Lets think about
// doing PRE of this load. This will involve inserting a new load into the
@@ -1526,7 +1485,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
BasicBlock *LoadBB = LI->getParent();
BasicBlock *TmpBB = LoadBB;
- bool allSingleSucc = true;
while (TmpBB->getSinglePredecessor()) {
TmpBB = TmpBB->getSinglePredecessor();
if (TmpBB == LoadBB) // Infinite (unreachable) loop.
@@ -1615,13 +1573,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
// pointer if it is not available.
PHITransAddr Address(LI->getPointerOperand(), TD);
Value *LoadPtr = 0;
- if (allSingleSucc) {
- LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
- *DT, NewInsts);
- } else {
- Address.PHITranslateValue(LoadBB, UnavailablePred, DT);
- LoadPtr = Address.getAddr();
- }
+ LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
+ *DT, NewInsts);
// If we couldn't find or insert a computation of this phi translated value,
// we fail PRE.
@@ -1632,24 +1585,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
break;
}
- // Make sure it is valid to move this load here. We have to watch out for:
- // @1 = getelementptr (i8* p, ...
- // test p and branch if == 0
- // load @1
- // It is valid to have the getelementptr before the test, even if p can
- // be 0, as getelementptr only does address arithmetic.
- // If we are not pushing the value through any multiple-successor blocks
- // we do not have this case. Otherwise, check that the load is safe to
- // put anywhere; this can be improved, but should be conservatively safe.
- if (!allSingleSucc &&
- // FIXME: REEVALUTE THIS.
- !isSafeToLoadUnconditionally(LoadPtr,
- UnavailablePred->getTerminator(),
- LI->getAlignment(), TD)) {
- CanDoPRE = false;
- break;
- }
-
I->second = LoadPtr;
}
@@ -1714,6 +1649,72 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
return true;
}
+/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
+/// non-local by performing PHI construction.
+bool GVN::processNonLocalLoad(LoadInst *LI) {
+ // Step 1: Find the non-local dependencies of the load.
+ LoadDepVect Deps;
+ AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
+ MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
+
+ // If we had to process more than one hundred blocks to find the
+ // dependencies, this load isn't worth worrying about. Optimizing
+ // it will be too expensive.
+ unsigned NumDeps = Deps.size();
+ if (NumDeps > 100)
+ return false;
+
+ // If we had a phi translation failure, we'll have a single entry which is a
+ // clobber in the current block. Reject this early.
+ if (NumDeps == 1 &&
+ !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
+ DEBUG(
+ dbgs() << "GVN: non-local load ";
+ WriteAsOperand(dbgs(), LI);
+ dbgs() << " has unknown dependencies\n";
+ );
+ return false;
+ }
+
+ // Step 2: Analyze the availability of the load
+ AvailValInBlkVect ValuesPerBlock;
+ UnavailBlkVect UnavailableBlocks;
+ AnalyzeLoadAvailability(LI, Deps, ValuesPerBlock, UnavailableBlocks);
+
+ // If we have no predecessors that produce a known value for this load, exit
+ // early.
+ if (ValuesPerBlock.empty())
+ return false;
+
+ // Step 3: Eliminate fully redundancy.
+ //
+ // If all of the instructions we depend on produce a known value for this
+ // load, then it is fully redundant and we can use PHI insertion to compute
+ // its value. Insert PHIs and remove the fully redundant value now.
+ if (UnavailableBlocks.empty()) {
+ DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
+
+ // Perform PHI construction.
+ Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
+ LI->replaceAllUsesWith(V);
+
+ if (isa<PHINode>(V))
+ V->takeName(LI);
+ if (V->getType()->getScalarType()->isPointerTy())
+ MD->invalidateCachedPointerInfo(V);
+ markInstructionForDeletion(LI);
+ ++NumGVNLoad;
+ return true;
+ }
+
+ // Step 4: Eliminate partial redundancy.
+ if (!EnablePRE || !EnableLoadPRE)
+ return false;
+
+ return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks);
+}
+
+
static void patchReplacementInstruction(Instruction *I, Value *Repl) {
// Patch the replacement so that it is not more restrictive than the value
// being replaced.
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
index 5d02c68..4796eb2 100644
--- a/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -200,9 +200,8 @@ void GlobalMerge::collectUsedGlobalVariables(Module &M) {
if (!GV || !GV->hasInitializer()) return;
// Should be an array of 'i8*'.
- const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
- if (InitList == 0) return;
-
+ const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
+
for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
if (const GlobalVariable *G =
dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index e98ae95..14c5655 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -56,8 +56,8 @@ namespace {
}
bool runOnLoop(Loop *L, LPPassManager &LPM);
- void simplifyLoopLatch(Loop *L);
- bool rotateLoop(Loop *L);
+ bool simplifyLoopLatch(Loop *L);
+ bool rotateLoop(Loop *L, bool SimplifiedLatch);
private:
LoopInfo *LI;
@@ -84,13 +84,14 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
// Simplify the loop latch before attempting to rotate the header
// upward. Rotation may not be needed if the loop tail can be folded into the
// loop exit.
- simplifyLoopLatch(L);
+ bool SimplifiedLatch = simplifyLoopLatch(L);
// One loop can be rotated multiple times.
bool MadeChange = false;
- while (rotateLoop(L))
+ while (rotateLoop(L, SimplifiedLatch)) {
MadeChange = true;
-
+ SimplifiedLatch = false;
+ }
return MadeChange;
}
@@ -212,25 +213,25 @@ static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
/// canonical form so downstream passes can handle it.
///
/// I don't believe this invalidates SCEV.
-void LoopRotate::simplifyLoopLatch(Loop *L) {
+bool LoopRotate::simplifyLoopLatch(Loop *L) {
BasicBlock *Latch = L->getLoopLatch();
if (!Latch || Latch->hasAddressTaken())
- return;
+ return false;
BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
if (!Jmp || !Jmp->isUnconditional())
- return;
+ return false;
BasicBlock *LastExit = Latch->getSinglePredecessor();
if (!LastExit || !L->isLoopExiting(LastExit))
- return;
+ return false;
BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
if (!BI)
- return;
+ return false;
if (!shouldSpeculateInstrs(Latch->begin(), Jmp))
- return;
+ return false;
DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
<< LastExit->getName() << "\n");
@@ -253,10 +254,20 @@ void LoopRotate::simplifyLoopLatch(Loop *L) {
if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
DT->eraseNode(Latch);
Latch->eraseFromParent();
+ return true;
}
/// Rotate loop LP. Return true if the loop is rotated.
-bool LoopRotate::rotateLoop(Loop *L) {
+///
+/// \param SimplifiedLatch is true if the latch was just folded into the final
+/// loop exit. In this case we may want to rotate even though the new latch is
+/// now an exiting branch. This rotation would have happened had the latch not
+/// been simplified. However, if SimplifiedLatch is false, then we avoid
+/// rotating loops in which the latch exits to avoid excessive or endless
+/// rotation. LoopRotate should be repeatable and converge to a canonical
+/// form. This property is satisfied because simplifying the loop latch can only
+/// happen once across multiple invocations of the LoopRotate pass.
+bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// If the loop has only one block then there is not much to rotate.
if (L->getBlocks().size() == 1)
return false;
@@ -276,7 +287,12 @@ bool LoopRotate::rotateLoop(Loop *L) {
// If the loop latch already contains a branch that leaves the loop then the
// loop is already rotated.
- if (OrigLatch == 0 || L->isLoopExiting(OrigLatch))
+ if (OrigLatch == 0)
+ return false;
+
+ // Rotate if either the loop latch does *not* exit the loop, or if the loop
+ // latch was just simplified.
+ if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch)
return false;
// Check size of original header and reject loop if it is very big or we can't
@@ -505,4 +521,3 @@ bool LoopRotate::rotateLoop(Loop *L) {
++NumRotated;
return true;
}
-
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 7ee4027..a3c241d 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -143,13 +143,9 @@ namespace {
// So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
// than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
// "z" in the order of X-Y-Z is better than any other orders.
- class PtrSortFunctor {
- ArrayRef<XorOpnd> A;
-
- public:
- PtrSortFunctor(ArrayRef<XorOpnd> Array) : A(Array) {}
- bool operator()(unsigned LHSIndex, unsigned RHSIndex) {
- return A[LHSIndex].getSymbolicRank() < A[RHSIndex].getSymbolicRank();
+ struct PtrSortFunctor {
+ bool operator()(XorOpnd * const &LHS, XorOpnd * const &RHS) {
+ return LHS->getSymbolicRank() < RHS->getSymbolicRank();
}
};
private:
@@ -1199,9 +1195,6 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
if (X != Opnd2->getSymbolicPart())
return false;
- const APInt &C1 = Opnd1->getConstPart();
- const APInt &C2 = Opnd2->getConstPart();
-
// This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
int DeadInstNum = 1;
if (Opnd1->getValue()->hasOneUse())
@@ -1219,6 +1212,8 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
if (Opnd2->isOrExpr())
std::swap(Opnd1, Opnd2);
+ const APInt &C1 = Opnd1->getConstPart();
+ const APInt &C2 = Opnd2->getConstPart();
APInt C3((~C1) ^ C2);
// Do not increase code size!
@@ -1234,6 +1229,8 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
} else if (Opnd1->isOrExpr()) {
// Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
//
+ const APInt &C1 = Opnd1->getConstPart();
+ const APInt &C2 = Opnd2->getConstPart();
APInt C3 = C1 ^ C2;
// Do not increase code size
@@ -1248,6 +1245,8 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
} else {
// Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
//
+ const APInt &C1 = Opnd1->getConstPart();
+ const APInt &C2 = Opnd2->getConstPart();
APInt C3 = C1 ^ C2;
Res = createAndInstr(I, X, C3);
}
@@ -1274,7 +1273,7 @@ Value *Reassociate::OptimizeXor(Instruction *I,
return 0;
SmallVector<XorOpnd, 8> Opnds;
- SmallVector<unsigned, 8> OpndIndices;
+ SmallVector<XorOpnd*, 8> OpndPtrs;
Type *Ty = Ops[0].Op->getType();
APInt ConstOpnd(Ty->getIntegerBitWidth(), 0);
@@ -1285,23 +1284,29 @@ Value *Reassociate::OptimizeXor(Instruction *I,
XorOpnd O(V);
O.setSymbolicRank(getRank(O.getSymbolicPart()));
Opnds.push_back(O);
- OpndIndices.push_back(Opnds.size() - 1);
} else
ConstOpnd ^= cast<ConstantInt>(V)->getValue();
}
+ // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds".
+ // It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate
+ // the "OpndPtrs" as well. For the similar reason, do not fuse this loop
+ // with the previous loop --- the iterator of the "Opnds" may be invalidated
+ // when new elements are added to the vector.
+ for (unsigned i = 0, e = Opnds.size(); i != e; ++i)
+ OpndPtrs.push_back(&Opnds[i]);
+
// Step 2: Sort the Xor-Operands in a way such that the operands containing
// the same symbolic value cluster together. For instance, the input operand
// sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
// ("x | 123", "x & 789", "y & 456").
- std::sort(OpndIndices.begin(), OpndIndices.end(),
- XorOpnd::PtrSortFunctor(Opnds));
+ std::sort(OpndPtrs.begin(), OpndPtrs.end(), XorOpnd::PtrSortFunctor());
// Step 3: Combine adjacent operands
XorOpnd *PrevOpnd = 0;
bool Changed = false;
for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
- XorOpnd *CurrOpnd = &Opnds[OpndIndices[i]];
+ XorOpnd *CurrOpnd = OpndPtrs[i];
// The combined value
Value *CV;
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index f6bb365..d073e78 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -2322,17 +2322,15 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
ConstantVector::get(Mask),
Name + ".expand");
- DEBUG(dbgs() << " shuffle1: " << *V << "\n");
+ DEBUG(dbgs() << " shuffle: " << *V << "\n");
Mask.clear();
for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
- if (i >= BeginIndex && i < EndIndex)
- Mask.push_back(IRB.getInt32(i));
- else
- Mask.push_back(IRB.getInt32(i + VecTy->getNumElements()));
- V = IRB.CreateShuffleVector(V, Old, ConstantVector::get(Mask),
- Name + "insert");
- DEBUG(dbgs() << " shuffle2: " << *V << "\n");
+ Mask.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
+
+ V = IRB.CreateSelect(ConstantVector::get(Mask), V, Old, Name + "blend");
+
+ DEBUG(dbgs() << " blend: " << *V << "\n");
return V;
}
@@ -2671,6 +2669,7 @@ private:
StoreInst *NewSI;
if (BeginOffset == NewAllocaBeginOffset &&
+ EndOffset == NewAllocaEndOffset &&
canConvertValue(TD, V->getType(), NewAllocaTy)) {
V = convertValue(TD, IRB, V, NewAllocaTy);
NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
@@ -3050,16 +3049,16 @@ private:
bool visitSelectInst(SelectInst &SI) {
DEBUG(dbgs() << " original: " << SI << "\n");
-
- // Find the operand we need to rewrite here.
- bool IsTrueVal = SI.getTrueValue() == OldPtr;
- if (IsTrueVal)
- assert(SI.getFalseValue() != OldPtr && "Pointer is both operands!");
- else
- assert(SI.getFalseValue() == OldPtr && "Pointer isn't an operand!");
+ assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&
+ "Pointer isn't an operand!");
Value *NewPtr = getAdjustedAllocaPtr(IRB, OldPtr->getType());
- SI.setOperand(IsTrueVal ? 1 : 2, NewPtr);
+ // Replace the operands which were using the old pointer.
+ if (SI.getOperand(1) == OldPtr)
+ SI.setOperand(1, NewPtr);
+ if (SI.getOperand(2) == OldPtr)
+ SI.setOperand(2, NewPtr);
+
DEBUG(dbgs() << " to: " << SI << "\n");
deleteIfTriviallyDead(OldPtr);
return false;
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index e590a37..bfde334 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -1462,8 +1462,8 @@ bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) {
}
// performScalarRepl - This algorithm is a simple worklist driven algorithm,
-// which runs on all of the alloca instructions in the function, removing them
-// if they are only used by getelementptr instructions.
+// which runs on all of the alloca instructions in the entry block, removing
+// them if they are only used by getelementptr instructions.
//
bool SROA::performScalarRepl(Function &F) {
std::vector<AllocaInst*> WorkList;
@@ -1724,17 +1724,8 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI,
continue;
ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
- if (!IdxVal) {
- // Non constant GEPs are only a problem on arrays, structs, and pointers
- // Vectors can be dynamically indexed.
- // FIXME: Add support for dynamic indexing on arrays. This should be
- // ok on any subarrays of the alloca array, eg, a[0][i] is ok, but a[i][0]
- // isn't.
- if (!(*GEPIt)->isVectorTy())
- return MarkUnsafe(Info, GEPI);
- NonConstant = true;
- NonConstantIdxSize = TD->getTypeAllocSize(*GEPIt);
- }
+ if (!IdxVal)
+ return MarkUnsafe(Info, GEPI);
}
// Compute the offset due to this GEP and check if the alloca has a
OpenPOWER on IntegriCloud