diff options
author | rdivacky <rdivacky@FreeBSD.org> | 2010-01-01 10:31:22 +0000 |
---|---|---|
committer | rdivacky <rdivacky@FreeBSD.org> | 2010-01-01 10:31:22 +0000 |
commit | a16c51cee9225a354c999dd1076d5dba2aa79807 (patch) | |
tree | dba00119388b84f9f44e6ec5e9129f807fd79ca3 /lib/Transforms | |
parent | 40a6fcdb85efd93fe0e36c9552cfb0b18b5eacd6 (diff) | |
download | FreeBSD-src-a16c51cee9225a354c999dd1076d5dba2aa79807.zip FreeBSD-src-a16c51cee9225a354c999dd1076d5dba2aa79807.tar.gz |
Update LLVM to 92395.
Diffstat (limited to 'lib/Transforms')
22 files changed, 1403 insertions, 1000 deletions
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp index 91534a7..eac4e17 100644 --- a/lib/Transforms/Hello/Hello.cpp +++ b/lib/Transforms/Hello/Hello.cpp @@ -56,7 +56,7 @@ namespace { // We don't modify the program, so we preserve all analyses virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - }; + } }; } diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 0b5e007..ae88d9e 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -24,7 +24,6 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/DebugInfo.h" @@ -220,17 +219,14 @@ static bool StripDebugInfo(Module &M) { Changed = true; NMD->eraseFromParent(); } - MetadataContext &TheMetadata = M.getContext().getMetadata(); - unsigned MDDbgKind = TheMetadata.getMDKind("dbg"); - if (!MDDbgKind) - return Changed; - + + unsigned MDDbgKind = M.getMDKindID("dbg"); for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; ++FI) for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) - TheMetadata.removeMD(MDDbgKind, BI); + BI->setMetadata(MDDbgKind, 0); return true; } diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index e4c4ae5..372616c 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -48,7 +48,7 @@ namespace { /// TLI - Keep a pointer of a TargetLowering to consult for determining /// transformation profitability. const TargetLowering *TLI; - ProfileInfo *PI; + ProfileInfo *PFI; /// BackEdges - Keep a set of all the loop back edges. /// @@ -99,7 +99,7 @@ void CodeGenPrepare::findLoopBackEdges(const Function &F) { bool CodeGenPrepare::runOnFunction(Function &F) { bool EverMadeChange = false; - PI = getAnalysisIfAvailable<ProfileInfo>(); + PFI = getAnalysisIfAvailable<ProfileInfo>(); // First pass, eliminate blocks that contain only PHI nodes and an // unconditional branch. EverMadeChange |= EliminateMostlyEmptyBlocks(F); @@ -288,9 +288,9 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { // The PHIs are now updated, change everything that refers to BB to use // DestBB and remove BB. BB->replaceAllUsesWith(DestBB); - if (PI) { - PI->replaceAllUses(BB, DestBB); - PI->removeEdge(ProfileInfo::getEdge(BB, DestBB)); + if (PFI) { + PFI->replaceAllUses(BB, DestBB); + PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB)); } BB->eraseFromParent(); @@ -368,9 +368,9 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, // If we found a workable predecessor, change TI to branch to Succ. if (FoundMatch) { - ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>(); - if (PI) - PI->splitEdge(TIBB, Dest, Pred); + ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>(); + if (PFI) + PFI->splitEdge(TIBB, Dest, Pred); Dest->removePredecessor(TIBB); TI->setSuccessor(SuccNum, Pred); return; diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 222792b..612b415 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -20,6 +20,7 @@ #include "llvm/BasicBlock.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/GlobalVariable.h" #include "llvm/Function.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" @@ -48,7 +49,6 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" -#include <cstdio> using namespace llvm; STATISTIC(NumGVNInstr, "Number of instructions deleted"); @@ -733,13 +733,13 @@ static RegisterPass<GVN> X("gvn", "Global Value Numbering"); void GVN::dump(DenseMap<uint32_t, Value*>& d) { - printf("{\n"); + errs() << "{\n"; for (DenseMap<uint32_t, Value*>::iterator I = d.begin(), E = d.end(); I != E; ++I) { - printf("%d\n", I->first); + errs() << I->first << "\n"; I->second->dump(); } - printf("}\n"); + errs() << "}\n"; } static bool isSafeReplacement(PHINode* p, Instruction *inst) { @@ -1278,6 +1278,32 @@ struct AvailableValueInBlock { assert(!isSimpleValue() && "Wrong accessor"); return cast<MemIntrinsic>(Val.getPointer()); } + + /// MaterializeAdjustedValue - Emit code into this block to adjust the value + /// defined here to the specified type. This handles various coercion cases. + Value *MaterializeAdjustedValue(const Type *LoadTy, + const TargetData *TD) const { + Value *Res; + if (isSimpleValue()) { + Res = getSimpleValue(); + if (Res->getType() != LoadTy) { + assert(TD && "Need target data to handle type mismatch case"); + Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), + *TD); + + DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " + << *getSimpleValue() << '\n' + << *Res << '\n' << "\n\n\n"); + } + } else { + Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, + LoadTy, BB->getTerminator(), *TD); + DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset + << " " << *getMemIntrinValue() << '\n' + << *Res << '\n' << "\n\n\n"); + } + return Res; + } }; /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, @@ -1286,7 +1312,15 @@ struct AvailableValueInBlock { static Value *ConstructSSAForLoadSet(LoadInst *LI, SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, const TargetData *TD, + const DominatorTree &DT, AliasAnalysis *AA) { + // Check for the fully redundant, dominating load case. In this case, we can + // just use the dominating value directly. + if (ValuesPerBlock.size() == 1 && + DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent())) + return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD); + + // Otherwise, we have to construct SSA form. SmallVector<PHINode*, 8> NewPHIs; SSAUpdater SSAUpdate(&NewPHIs); SSAUpdate.Initialize(LI); @@ -1300,28 +1334,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, if (SSAUpdate.HasValueForBlock(BB)) continue; - unsigned Offset = AV.Offset; - - Value *AvailableVal; - if (AV.isSimpleValue()) { - AvailableVal = AV.getSimpleValue(); - if (AvailableVal->getType() != LoadTy) { - assert(TD && "Need target data to handle type mismatch case"); - AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy, - BB->getTerminator(), *TD); - - DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " - << *AV.getSimpleValue() << '\n' - << *AvailableVal << '\n' << "\n\n\n"); - } - } else { - AvailableVal = GetMemInstValueForLoad(AV.getMemIntrinValue(), Offset, - LoadTy, BB->getTerminator(), *TD); - DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset - << " " << *AV.getMemIntrinValue() << '\n' - << *AvailableVal << '\n' << "\n\n\n"); - } - SSAUpdate.AddAvailableValue(BB, AvailableVal); + SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD)); } // Perform PHI construction. @@ -1346,7 +1359,7 @@ static bool isLifetimeStart(Instruction *Inst) { bool GVN::processNonLocalLoad(LoadInst *LI, SmallVectorImpl<Instruction*> &toErase) { // Find the non-local dependencies of the load. - SmallVector<NonLocalDepEntry, 64> Deps; + SmallVector<NonLocalDepResult, 64> Deps; MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(), Deps); //DEBUG(errs() << "INVESTIGATING NONLOCAL LOAD: " @@ -1490,7 +1503,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); // Perform PHI construction. - Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, + Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, VN.getAliasAnalysis()); LI->replaceAllUsesWith(V); @@ -1679,7 +1692,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,NewLoad)); // Perform PHI construction. - Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, + Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, VN.getAliasAnalysis()); LI->replaceAllUsesWith(V); if (isa<PHINode>(V)) diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 2912421..3aa4fd3 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -258,7 +258,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, // Check that InVal is defined in the loop. Instruction *Inst = cast<Instruction>(InVal); - if (!L->contains(Inst->getParent())) + if (!L->contains(Inst)) continue; // Okay, this instruction has a user outside of the current loop diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index b9c536f..516d72e 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -75,6 +75,15 @@ STATISTIC(NumDeadInst , "Number of dead inst eliminated"); STATISTIC(NumDeadStore, "Number of dead stores eliminated"); STATISTIC(NumSunkInst , "Number of instructions sunk"); +/// SelectPatternFlavor - We can match a variety of different patterns for +/// select operations. +enum SelectPatternFlavor { + SPF_UNKNOWN = 0, + SPF_SMIN, SPF_UMIN, + SPF_SMAX, SPF_UMAX + //SPF_ABS - TODO. +}; + namespace { /// InstCombineWorklist - This is the worklist management logic for /// InstCombine. @@ -257,7 +266,8 @@ namespace { ConstantInt *RHS); Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, ConstantInt *DivRHS); - + Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI, + ICmpInst::Predicate Pred, Value *TheAdd); Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, ICmpInst::Predicate Cond, Instruction &I); Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1, @@ -280,6 +290,9 @@ namespace { Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI, Instruction *FI); Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*); + Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1, + Value *A, Value *B, Instruction &Outer, + SelectPatternFlavor SPF2, Value *C); Instruction *visitSelectInst(SelectInst &SI); Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI); Instruction *visitCallInst(CallInst &CI); @@ -648,6 +661,57 @@ static inline Value *dyn_castFNegVal(Value *V) { return 0; } +/// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms, +/// returning the kind and providing the out parameter results if we +/// successfully match. +static SelectPatternFlavor +MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) { + SelectInst *SI = dyn_cast<SelectInst>(V); + if (SI == 0) return SPF_UNKNOWN; + + ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition()); + if (ICI == 0) return SPF_UNKNOWN; + + LHS = ICI->getOperand(0); + RHS = ICI->getOperand(1); + + // (icmp X, Y) ? X : Y + if (SI->getTrueValue() == ICI->getOperand(0) && + SI->getFalseValue() == ICI->getOperand(1)) { + switch (ICI->getPredicate()) { + default: return SPF_UNKNOWN; // Equality. + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: return SPF_UMAX; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: return SPF_SMAX; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: return SPF_UMIN; + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: return SPF_SMIN; + } + } + + // (icmp X, Y) ? Y : X + if (SI->getTrueValue() == ICI->getOperand(1) && + SI->getFalseValue() == ICI->getOperand(0)) { + switch (ICI->getPredicate()) { + default: return SPF_UNKNOWN; // Equality. + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: return SPF_UMIN; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: return SPF_SMIN; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: return SPF_UMAX; + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: return SPF_SMAX; + } + } + + // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5) + + return SPF_UNKNOWN; +} + /// isFreeToInvert - Return true if the specified value is free to invert (apply /// ~ to). This happens in cases where the ~ can be eliminated. static inline bool isFreeToInvert(Value *V) { @@ -732,12 +796,12 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) { APInt MulExt = LHSExt * RHSExt; - if (sign) { - APInt Min = APInt::getSignedMinValue(W).sext(W * 2); - APInt Max = APInt::getSignedMaxValue(W).sext(W * 2); - return MulExt.slt(Min) || MulExt.sgt(Max); - } else + if (!sign) return MulExt.ugt(APInt::getLowBitsSet(W * 2, W)); + + APInt Min = APInt::getSignedMinValue(W).sext(W * 2); + APInt Max = APInt::getSignedMaxValue(W).sext(W * 2); + return MulExt.slt(Min) || MulExt.sgt(Max); } @@ -2736,9 +2800,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (Op0 == Op1) // sub X, X -> 0 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - // If this is a 'B = x-(-A)', change to B = x+A. - if (Value *V = dyn_castNegVal(Op1)) - return BinaryOperator::CreateAdd(Op0, V); + // If this is a 'B = x-(-A)', change to B = x+A. This preserves NSW/NUW. + if (Value *V = dyn_castNegVal(Op1)) { + BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V); + Res->setHasNoSignedWrap(I.hasNoSignedWrap()); + Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); + return Res; + } if (isa<UndefValue>(Op0)) return ReplaceInstUsesWith(I, Op0); // undef - X -> undef @@ -6356,24 +6424,26 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // comparison into the select arms, which will cause one to be // constant folded and the select turned into a bitwise or. Value *Op1 = 0, *Op2 = 0; - if (LHSI->hasOneUse()) { - if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) { - // Fold the known value into the constant operand. - Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); - // Insert a new ICmp of the other select operand. - Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), - RHSC, I.getName()); - } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) { - // Fold the known value into the constant operand. - Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); - // Insert a new ICmp of the other select operand. + if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) + Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); + if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) + Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); + + // We only want to perform this transformation if it will not lead to + // additional code. This is true if either both sides of the select + // fold to a constant (in which case the icmp is replaced with a select + // which will usually simplify) or this is the only user of the + // select (in which case we are trading a select+icmp for a simpler + // select+icmp). + if ((Op1 && Op2) || (LHSI->hasOneUse() && (Op1 || Op2))) { + if (!Op1) Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), RHSC, I.getName()); - } - } - - if (Op1) + if (!Op2) + Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), + RHSC, I.getName()); return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); + } break; } case Instruction::Call: @@ -6452,7 +6522,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // if (X) ... // For generality, we handle any zero-extension of any operand comparison // with a constant or another cast from the same type. - if (isa<ConstantInt>(Op1) || isa<CastInst>(Op1)) + if (isa<Constant>(Op1) || isa<CastInst>(Op1)) if (Instruction *R = visitICmpInstWithCastAndCast(I)) return R; } @@ -6598,9 +6668,112 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } } } + + { + Value *X; ConstantInt *Cst; + // icmp X+Cst, X + if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X) + return FoldICmpAddOpCst(I, X, Cst, I.getPredicate(), Op0); + + // icmp X, X+Cst + if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X) + return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate(), Op1); + } return Changed ? &I : 0; } +/// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X". +Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI, + Value *X, ConstantInt *CI, + ICmpInst::Predicate Pred, + Value *TheAdd) { + // If we have X+0, exit early (simplifying logic below) and let it get folded + // elsewhere. icmp X+0, X -> icmp X, X + if (CI->isZero()) { + bool isTrue = ICmpInst::isTrueWhenEqual(Pred); + return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); + } + + // (X+4) == X -> false. + if (Pred == ICmpInst::ICMP_EQ) + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); + + // (X+4) != X -> true. + if (Pred == ICmpInst::ICMP_NE) + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); + + // If this is an instruction (as opposed to constantexpr) get NUW/NSW info. + bool isNUW = false, isNSW = false; + if (BinaryOperator *Add = dyn_cast<BinaryOperator>(TheAdd)) { + isNUW = Add->hasNoUnsignedWrap(); + isNSW = Add->hasNoSignedWrap(); + } + + // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0, + // so the values can never be equal. Similiarly for all other "or equals" + // operators. + + // (X+1) <u X --> X >u (MAXUINT-1) --> X != 255 + // (X+2) <u X --> X >u (MAXUINT-2) --> X > 253 + // (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0 + if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) { + // If this is an NUW add, then this is always false. + if (isNUW) + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext())); + + Value *R = ConstantExpr::getSub(ConstantInt::get(CI->getType(), -1ULL), CI); + return new ICmpInst(ICmpInst::ICMP_UGT, X, R); + } + + // (X+1) >u X --> X <u (0-1) --> X != 255 + // (X+2) >u X --> X <u (0-2) --> X <u 254 + // (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0 + if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) { + // If this is an NUW add, then this is always true. + if (isNUW) + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); + return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI)); + } + + unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits(); + ConstantInt *SMax = ConstantInt::get(X->getContext(), + APInt::getSignedMaxValue(BitWidth)); + + // (X+ 1) <s X --> X >s (MAXSINT-1) --> X == 127 + // (X+ 2) <s X --> X >s (MAXSINT-2) --> X >s 125 + // (X+MAXSINT) <s X --> X >s (MAXSINT-MAXSINT) --> X >s 0 + // (X+MINSINT) <s X --> X >s (MAXSINT-MINSINT) --> X >s -1 + // (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126 + // (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127 + if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) { + // If this is an NSW add, then we have two cases: if the constant is + // positive, then this is always false, if negative, this is always true. + if (isNSW) { + bool isTrue = CI->getValue().isNegative(); + return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); + } + + return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI)); + } + + // (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127 + // (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126 + // (X+MAXSINT) >s X --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1 + // (X+MINSINT) >s X --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2 + // (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126 + // (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128 + + // If this is an NSW add, then we have two cases: if the constant is + // positive, then this is always true, if negative, this is always false. + if (isNSW) { + bool isTrue = !CI->getValue().isNegative(); + return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue)); + } + + assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE); + Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1); + return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C)); +} /// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS /// and CmpRHS are both known to be integer constants. @@ -7075,8 +7248,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, break; case Instruction::Add: - // Fold: icmp pred (add, X, C1), C2 - + // Fold: icmp pred (add X, C1), C2 if (!ICI.isEquality()) { ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(1)); if (!LHSC) break; @@ -7299,19 +7471,17 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // If the re-extended constant didn't change... if (Res2 == CI) { - // Make sure that sign of the Cmp and the sign of the Cast are the same. - // For example, we might have: - // %A = sext i16 %X to i32 - // %B = icmp ugt i32 %A, 1330 - // It is incorrect to transform this into - // %B = icmp ugt i16 %X, 1330 - // because %A may have negative value. - // - // However, we allow this when the compare is EQ/NE, because they are - // signless. - if (isSignedExt == isSignedCmp || ICI.isEquality()) + // Deal with equality cases early. + if (ICI.isEquality()) return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1); - return 0; + + // A signed comparison of sign extended values simplifies into a + // signed comparison. + if (isSignedExt && isSignedCmp) + return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1); + + // The other three cases all fold into an unsigned comparison. + return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1); } // The re-extended constant changed so the constant cannot be represented @@ -9372,9 +9542,6 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, return ReplaceInstUsesWith(SI, TrueVal); /// NOTE: if we wanted to, this is where to detect integer MIN/MAX } - - /// NOTE: if we wanted to, this is where to detect integer ABS - return Changed ? &SI : 0; } @@ -9416,6 +9583,35 @@ static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V, return false; } +/// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form: +/// SPF2(SPF1(A, B), C) +Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner, + SelectPatternFlavor SPF1, + Value *A, Value *B, + Instruction &Outer, + SelectPatternFlavor SPF2, Value *C) { + if (C == A || C == B) { + // MAX(MAX(A, B), B) -> MAX(A, B) + // MIN(MIN(a, b), a) -> MIN(a, b) + if (SPF1 == SPF2) + return ReplaceInstUsesWith(Outer, Inner); + + // MAX(MIN(a, b), a) -> a + // MIN(MAX(a, b), a) -> a + if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) || + (SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) || + (SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) || + (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN)) + return ReplaceInstUsesWith(Outer, C); + } + + // TODO: MIN(MIN(A, 23), 97) + return 0; +} + + + + Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *CondVal = SI.getCondition(); Value *TrueVal = SI.getTrueValue(); @@ -9622,9 +9818,28 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // See if we can fold the select into one of our operands. if (SI.getType()->isInteger()) { - Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal); - if (FoldI) + if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal)) return FoldI; + + // MAX(MAX(a, b), a) -> MAX(a, b) + // MIN(MIN(a, b), a) -> MIN(a, b) + // MAX(MIN(a, b), a) -> a + // MIN(MAX(a, b), a) -> a + Value *LHS, *RHS, *LHS2, *RHS2; + if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) { + if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2)) + if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2, + SI, SPF, RHS)) + return R; + if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2)) + if (Instruction *R = FoldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2, + SI, SPF, LHS)) + return R; + } + + // TODO. + // ABS(-X) -> ABS(X) + // ABS(ABS(X)) -> ABS(X) } // See if we can fold the select into a phi node if the condition is a select. @@ -9896,9 +10111,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Intrinsic::getDeclaration(M, MemCpyID, Tys, 1)); Changed = true; } + } + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove(x,x,size) -> noop. - if (MMI->getSource() == MMI->getDest()) + if (MTI->getSource() == MTI->getDest()) return EraseInstFromFunction(CI); } @@ -9923,6 +10140,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (Operand->getIntrinsicID() == Intrinsic::bswap) return ReplaceInstUsesWith(CI, Operand->getOperand(1)); break; + case Intrinsic::powi: + if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getOperand(2))) { + // powi(x, 0) -> 1.0 + if (Power->isZero()) + return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0)); + // powi(x, 1) -> x + if (Power->isOne()) + return ReplaceInstUsesWith(CI, II->getOperand(1)); + // powi(x, -1) -> 1/x + if (Power->isAllOnesValue()) + return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0), + II->getOperand(1)); + } + break; + case Intrinsic::uadd_with_overflow: { Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType()); @@ -11232,6 +11464,23 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) { PHINode *PN = PHIsToSlice[PHIId]; + // Scan the input list of the PHI. If any input is an invoke, and if the + // input is defined in the predecessor, then we won't be split the critical + // edge which is required to insert a truncate. Because of this, we have to + // bail out. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i)); + if (II == 0) continue; + if (II->getParent() != PN->getIncomingBlock(i)) + continue; + + // If we have a phi, and if it's directly in the predecessor, then we have + // a critical edge where we need to put the truncate. Since we can't + // split the edge in instcombine, we have to bail out. + return 0; + } + + for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); @@ -11314,7 +11563,9 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { PredVal = EltPHI; EltPHI->addIncoming(PredVal, Pred); continue; - } else if (PHINode *InPHI = dyn_cast<PHINode>(PN)) { + } + + if (PHINode *InPHI = dyn_cast<PHINode>(PN)) { // If the incoming value was a PHI, and if it was one of the PHIs we // already rewrote it, just use the lowered value. if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) { diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index d58b9c9..7e6cf79 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 42a8fdc..99f3ae0 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -433,7 +433,7 @@ bool LICM::isNotUsedInLoop(Instruction &I) { if (PN->getIncomingValue(i) == &I) if (CurLoop->contains(PN->getIncomingBlock(i))) return false; - } else if (CurLoop->contains(User->getParent())) { + } else if (CurLoop->contains(User)) { return false; } } @@ -831,7 +831,7 @@ void LICM::FindPromotableValuesInLoop( UI != UE; ++UI) { // Ignore instructions not in this loop. Instruction *Use = dyn_cast<Instruction>(*UI); - if (!Use || !CurLoop->contains(Use->getParent())) + if (!Use || !CurLoop->contains(Use)) continue; if (!isa<LoadInst>(Use) && !isa<StoreInst>(Use)) { diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp index 8b6a233..1d9dd68 100644 --- a/lib/Transforms/Scalar/LoopIndexSplit.cpp +++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp @@ -288,7 +288,7 @@ bool LoopIndexSplit::runOnLoop(Loop *IncomingLoop, LPPassManager &LPM_Ref) { // isUsedOutsideLoop - Returns true iff V is used outside the loop L. static bool isUsedOutsideLoop(Value *V, Loop *L) { for(Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) - if (!L->contains(cast<Instruction>(*UI)->getParent())) + if (!L->contains(cast<Instruction>(*UI))) return true; return false; } @@ -842,7 +842,7 @@ void LoopIndexSplit::updatePHINodes(BasicBlock *ExitBB, BasicBlock *Latch, for (Value::use_iterator UI = PHV->use_begin(), E = PHV->use_end(); UI != E; ++UI) if (PHINode *U = dyn_cast<PHINode>(*UI)) - if (LP->contains(U->getParent())) { + if (LP->contains(U)) { NewV = U; break; } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 85cc712..85f7368 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -144,7 +144,7 @@ namespace { /// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a /// single stride of IV. All of the users may have different starting /// values, and this may not be the only stride. - void StrengthReduceIVUsersOfStride(const SCEV *const &Stride, + void StrengthReduceIVUsersOfStride(const SCEV *Stride, IVUsersOfOneStride &Uses, Loop *L); void StrengthReduceIVUsers(Loop *L); @@ -157,14 +157,14 @@ namespace { bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, const SCEV* &CondStride); bool RequiresTypeConversion(const Type *Ty, const Type *NewTy); - const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *const&, + const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *, IVExpr&, const Type*, const std::vector<BasedUser>& UsersToProcess); bool ValidScale(bool, int64_t, const std::vector<BasedUser>& UsersToProcess); bool ValidOffset(bool, int64_t, int64_t, const std::vector<BasedUser>& UsersToProcess); - const SCEV *CollectIVUsers(const SCEV *const &Stride, + const SCEV *CollectIVUsers(const SCEV *Stride, IVUsersOfOneStride &Uses, Loop *L, bool &AllUsesAreAddresses, @@ -212,8 +212,6 @@ Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) { /// specified set are trivially dead, delete them and see if this makes any of /// their operands subsequently dead. void LoopStrengthReduce::DeleteTriviallyDeadInstructions() { - if (DeadInsts.empty()) return; - while (!DeadInsts.empty()) { Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()); @@ -232,44 +230,6 @@ void LoopStrengthReduce::DeleteTriviallyDeadInstructions() { } } -/// containsAddRecFromDifferentLoop - Determine whether expression S involves a -/// subexpression that is an AddRec from a loop other than L. An outer loop -/// of L is OK, but not an inner loop nor a disjoint loop. -static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) { - // This is very common, put it first. - if (isa<SCEVConstant>(S)) - return false; - if (const SCEVCommutativeExpr *AE = dyn_cast<SCEVCommutativeExpr>(S)) { - for (unsigned int i=0; i< AE->getNumOperands(); i++) - if (containsAddRecFromDifferentLoop(AE->getOperand(i), L)) - return true; - return false; - } - if (const SCEVAddRecExpr *AE = dyn_cast<SCEVAddRecExpr>(S)) { - if (const Loop *newLoop = AE->getLoop()) { - if (newLoop == L) - return false; - // if newLoop is an outer loop of L, this is OK. - if (newLoop->contains(L->getHeader())) - return false; - } - return true; - } - if (const SCEVUDivExpr *DE = dyn_cast<SCEVUDivExpr>(S)) - return containsAddRecFromDifferentLoop(DE->getLHS(), L) || - containsAddRecFromDifferentLoop(DE->getRHS(), L); -#if 0 - // SCEVSDivExpr has been backed out temporarily, but will be back; we'll - // need this when it is. - if (const SCEVSDivExpr *DE = dyn_cast<SCEVSDivExpr>(S)) - return containsAddRecFromDifferentLoop(DE->getLHS(), L) || - containsAddRecFromDifferentLoop(DE->getRHS(), L); -#endif - if (const SCEVCastExpr *CE = dyn_cast<SCEVCastExpr>(S)) - return containsAddRecFromDifferentLoop(CE->getOperand(), L); - return false; -} - /// isAddressUse - Returns true if the specified instruction is using the /// specified value as an address. static bool isAddressUse(Instruction *Inst, Value *OperandVal) { @@ -362,13 +322,13 @@ namespace { // Once we rewrite the code to insert the new IVs we want, update the // operands of Inst to use the new expression 'NewBase', with 'Imm' added // to it. - void RewriteInstructionToUseNewBase(const SCEV *const &NewBase, + void RewriteInstructionToUseNewBase(const SCEV *NewBase, Instruction *InsertPt, SCEVExpander &Rewriter, Loop *L, Pass *P, SmallVectorImpl<WeakVH> &DeadInsts, ScalarEvolution *SE); - Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase, + Value *InsertCodeForBaseAtPosition(const SCEV *NewBase, const Type *Ty, SCEVExpander &Rewriter, Instruction *IP, @@ -378,12 +338,12 @@ namespace { } void BasedUser::dump() const { - errs() << " Base=" << *Base; - errs() << " Imm=" << *Imm; - errs() << " Inst: " << *Inst; + dbgs() << " Base=" << *Base; + dbgs() << " Imm=" << *Imm; + dbgs() << " Inst: " << *Inst; } -Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, +Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *NewBase, const Type *Ty, SCEVExpander &Rewriter, Instruction *IP, @@ -407,7 +367,7 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, // value of NewBase in the case that it's a diffferent instruction from // the PHI that NewBase is computed from, or null otherwise. // -void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, +void BasedUser::RewriteInstructionToUseNewBase(const SCEV *NewBase, Instruction *NewBasePt, SCEVExpander &Rewriter, Loop *L, Pass *P, SmallVectorImpl<WeakVH> &DeadInsts, @@ -428,7 +388,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, // If this is a use outside the loop (which means after, since it is based // on a loop indvar) we use the post-incremented value, so that we don't // artificially make the preinc value live out the bottom of the loop. - if (!isUseOfPostIncrementedValue && L->contains(Inst->getParent())) { + if (!isUseOfPostIncrementedValue && L->contains(Inst)) { if (NewBasePt && isa<PHINode>(OperandValToReplace)) { InsertPt = NewBasePt; ++InsertPt; @@ -444,9 +404,9 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, // Replace the use of the operand Value with the new Phi we just created. Inst->replaceUsesOfWith(OperandValToReplace, NewVal); - DEBUG(errs() << " Replacing with "); - DEBUG(WriteAsOperand(errs(), NewVal, /*PrintType=*/false)); - DEBUG(errs() << ", which has value " << *NewBase << " plus IMM " + DEBUG(dbgs() << " Replacing with "); + DEBUG(WriteAsOperand(dbgs(), NewVal, /*PrintType=*/false)); + DEBUG(dbgs() << ", which has value " << *NewBase << " plus IMM " << *Imm << "\n"); return; } @@ -469,7 +429,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, // that case(?). Instruction *OldLoc = dyn_cast<Instruction>(OperandValToReplace); BasicBlock *PHIPred = PN->getIncomingBlock(i); - if (L->contains(OldLoc->getParent())) { + if (L->contains(OldLoc)) { // If this is a critical edge, split the edge so that we do not insert // the code on all predecessor/successor paths. We do this unless this // is the canonical backedge for this loop, as this can make some @@ -486,7 +446,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, // is outside of the loop, and PredTI is in the loop, we want to // move the block to be immediately before the PHI block, not // immediately after PredTI. - if (L->contains(PHIPred) && !L->contains(PN->getParent())) + if (L->contains(PHIPred) && !L->contains(PN)) NewBB->moveBefore(PN->getParent()); // Splitting the edge can reduce the number of PHI entries we have. @@ -498,15 +458,15 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, Value *&Code = InsertedCode[PHIPred]; if (!Code) { // Insert the code into the end of the predecessor block. - Instruction *InsertPt = (L->contains(OldLoc->getParent())) ? + Instruction *InsertPt = (L->contains(OldLoc)) ? PHIPred->getTerminator() : OldLoc->getParent()->getTerminator(); Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(), Rewriter, InsertPt, SE); - DEBUG(errs() << " Changing PHI use to "); - DEBUG(WriteAsOperand(errs(), Code, /*PrintType=*/false)); - DEBUG(errs() << ", which has value " << *NewBase << " plus IMM " + DEBUG(dbgs() << " Changing PHI use to "); + DEBUG(WriteAsOperand(dbgs(), Code, /*PrintType=*/false)); + DEBUG(dbgs() << ", which has value " << *NewBase << " plus IMM " << *Imm << "\n"); } @@ -523,7 +483,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, /// fitsInAddressMode - Return true if V can be subsumed within an addressing /// mode, and does not need to be put in a register first. -static bool fitsInAddressMode(const SCEV *const &V, const Type *AccessTy, +static bool fitsInAddressMode(const SCEV *V, const Type *AccessTy, const TargetLowering *TLI, bool HasBaseReg) { if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) { int64_t VC = SC->getValue()->getSExtValue(); @@ -737,7 +697,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, // it is clearly shared across all the IV's. If the use is outside the loop // (which means after it) we don't want to factor anything *into* the loop, // so just use 0 as the base. - if (L->contains(Uses[0].Inst->getParent())) + if (L->contains(Uses[0].Inst)) std::swap(Result, Uses[0].Base); return Result; } @@ -762,7 +722,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, // after the loop to affect base computation of values *inside* the loop, // because we can always add their offsets to the result IV after the loop // is done, ensuring we get good code inside the loop. - if (!L->contains(Uses[i].Inst->getParent())) + if (!L->contains(Uses[i].Inst)) continue; NumUsesInsideLoop++; @@ -818,7 +778,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, // and a Result in the same instruction (for example because it would // require too many registers). Check this. for (unsigned i=0; i<NumUses; ++i) { - if (!L->contains(Uses[i].Inst->getParent())) + if (!L->contains(Uses[i].Inst)) continue; // We know this is an addressing mode use; if there are any uses that // are not, FreeResult would be Zero. @@ -854,7 +814,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, // the final IV value coming into those uses does. Instead of trying to // remove the pieces of the common base, which might not be there, // subtract off the base to compensate for this. - if (!L->contains(Uses[i].Inst->getParent())) { + if (!L->contains(Uses[i].Inst)) { Uses[i].Base = SE->getMinusSCEV(Uses[i].Base, Result); continue; } @@ -975,7 +935,7 @@ bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1, const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, bool AllUsesAreAddresses, bool AllUsesAreOutsideLoop, - const SCEV *const &Stride, + const SCEV *Stride, IVExpr &IV, const Type *Ty, const std::vector<BasedUser>& UsersToProcess) { if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) { @@ -1088,7 +1048,7 @@ static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) { /// isNonConstantNegative - Return true if the specified scev is negated, but /// not a constant. -static bool isNonConstantNegative(const SCEV *const &Expr) { +static bool isNonConstantNegative(const SCEV *Expr) { const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr); if (!Mul) return false; @@ -1105,7 +1065,7 @@ static bool isNonConstantNegative(const SCEV *const &Expr) { /// base of the strided accesses, as well as the old information from Uses. We /// progressively move information from the Base field to the Imm field, until /// we eventually have the full access expression to rewrite the use. -const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride, +const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *Stride, IVUsersOfOneStride &Uses, Loop *L, bool &AllUsesAreAddresses, @@ -1149,7 +1109,7 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride, // If the user is not in the current loop, this means it is using the exit // value of the IV. Do not put anything in the base, make sure it's all in // the immediate field to allow as much factoring as possible. - if (!L->contains(UsersToProcess[i].Inst->getParent())) { + if (!L->contains(UsersToProcess[i].Inst)) { UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, UsersToProcess[i].Base); UsersToProcess[i].Base = @@ -1361,7 +1321,7 @@ LoopStrengthReduce::PrepareToStrengthReduceFully( const SCEV *CommonExprs, const Loop *L, SCEVExpander &PreheaderRewriter) { - DEBUG(errs() << " Fully reducing all users\n"); + DEBUG(dbgs() << " Fully reducing all users\n"); // Rewrite the UsersToProcess records, creating a separate PHI for each // unique Base value. @@ -1393,7 +1353,7 @@ static Instruction *FindIVIncInsertPt(std::vector<BasedUser> &UsersToProcess, const Loop *L) { if (UsersToProcess.size() == 1 && UsersToProcess[0].isUseOfPostIncrementedValue && - L->contains(UsersToProcess[0].Inst->getParent())) + L->contains(UsersToProcess[0].Inst)) return UsersToProcess[0].Inst; return L->getLoopLatch()->getTerminator(); } @@ -1410,7 +1370,7 @@ LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi( Instruction *IVIncInsertPt, const Loop *L, SCEVExpander &PreheaderRewriter) { - DEBUG(errs() << " Inserting new PHI:\n"); + DEBUG(dbgs() << " Inserting new PHI:\n"); PHINode *Phi = InsertAffinePhi(SE->getUnknown(CommonBaseV), Stride, IVIncInsertPt, L, @@ -1423,9 +1383,9 @@ LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi( for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) UsersToProcess[i].Phi = Phi; - DEBUG(errs() << " IV="); - DEBUG(WriteAsOperand(errs(), Phi, /*PrintType=*/false)); - DEBUG(errs() << "\n"); + DEBUG(dbgs() << " IV="); + DEBUG(WriteAsOperand(dbgs(), Phi, /*PrintType=*/false)); + DEBUG(dbgs() << "\n"); } /// PrepareToStrengthReduceFromSmallerStride - Prepare for the given users to @@ -1438,7 +1398,7 @@ LoopStrengthReduce::PrepareToStrengthReduceFromSmallerStride( Value *CommonBaseV, const IVExpr &ReuseIV, Instruction *PreInsertPt) { - DEBUG(errs() << " Rewriting in terms of existing IV of STRIDE " + DEBUG(dbgs() << " Rewriting in terms of existing IV of STRIDE " << *ReuseIV.Stride << " and BASE " << *ReuseIV.Base << "\n"); // All the users will share the reused IV. @@ -1482,7 +1442,7 @@ static bool IsImmFoldedIntoAddrMode(GlobalValue *GV, int64_t Offset, /// stride of IV. All of the users may have different starting values, and this /// may not be the only stride. void -LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride, +LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *Stride, IVUsersOfOneStride &Uses, Loop *L) { // If all the users are moved to another stride, then there is nothing to do. @@ -1547,7 +1507,7 @@ LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride, UsersToProcess, TLI); if (DoSink) { - DEBUG(errs() << " Sinking " << *Imm << " back down into uses\n"); + DEBUG(dbgs() << " Sinking " << *Imm << " back down into uses\n"); for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, Imm); CommonExprs = NewCommon; @@ -1559,7 +1519,7 @@ LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride, // Now that we know what we need to do, insert the PHI node itself. // - DEBUG(errs() << "LSR: Examining IVs of TYPE " << *ReplacedTy << " of STRIDE " + DEBUG(dbgs() << "LSR: Examining IVs of TYPE " << *ReplacedTy << " of STRIDE " << *Stride << ":\n" << " Common base: " << *CommonExprs << "\n"); @@ -1623,10 +1583,10 @@ LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride, if (!Base->isZero()) { BaseV = PreheaderRewriter.expandCodeFor(Base, 0, PreInsertPt); - DEBUG(errs() << " INSERTING code for BASE = " << *Base << ":"); + DEBUG(dbgs() << " INSERTING code for BASE = " << *Base << ":"); if (BaseV->hasName()) - DEBUG(errs() << " Result value name = %" << BaseV->getName()); - DEBUG(errs() << "\n"); + DEBUG(dbgs() << " Result value name = %" << BaseV->getName()); + DEBUG(dbgs() << "\n"); // If BaseV is a non-zero constant, make sure that it gets inserted into // the preheader, instead of being forward substituted into the uses. We @@ -1647,15 +1607,15 @@ LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride, // FIXME: Use emitted users to emit other users. BasedUser &User = UsersToProcess.back(); - DEBUG(errs() << " Examining "); + DEBUG(dbgs() << " Examining "); if (User.isUseOfPostIncrementedValue) - DEBUG(errs() << "postinc"); + DEBUG(dbgs() << "postinc"); else - DEBUG(errs() << "preinc"); - DEBUG(errs() << " use "); - DEBUG(WriteAsOperand(errs(), UsersToProcess.back().OperandValToReplace, + DEBUG(dbgs() << "preinc"); + DEBUG(dbgs() << " use "); + DEBUG(WriteAsOperand(dbgs(), UsersToProcess.back().OperandValToReplace, /*PrintType=*/false)); - DEBUG(errs() << " in Inst: " << *User.Inst); + DEBUG(dbgs() << " in Inst: " << *User.Inst); // If this instruction wants to use the post-incremented value, move it // after the post-inc and use its value instead of the PHI. @@ -1666,7 +1626,7 @@ LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride, // loop to ensure it is dominated by the increment. In case it's the // only use of the iv, the increment instruction is already before the // use. - if (L->contains(User.Inst->getParent()) && User.Inst != IVIncInsertPt) + if (L->contains(User.Inst) && User.Inst != IVIncInsertPt) User.Inst->moveBefore(IVIncInsertPt); } @@ -1728,7 +1688,7 @@ LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride, // common base, and are adding it back here. Use the same expression // as before, rather than CommonBaseV, so DAGCombiner will zap it. if (!CommonExprs->isZero()) { - if (L->contains(User.Inst->getParent())) + if (L->contains(User.Inst)) RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(CommonBaseV)); else @@ -1815,7 +1775,7 @@ namespace { const ScalarEvolution *SE; explicit StrideCompare(const ScalarEvolution *se) : SE(se) {} - bool operator()(const SCEV *const &LHS, const SCEV *const &RHS) { + bool operator()(const SCEV *LHS, const SCEV *RHS) { const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS); const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS); if (LHSC && RHSC) { @@ -2069,8 +2029,8 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, Cond = new ICmpInst(OldCond, Predicate, NewCmpLHS, NewCmpRHS, L->getHeader()->getName() + ".termcond"); - DEBUG(errs() << " Change compare stride in Inst " << *OldCond); - DEBUG(errs() << " to " << *Cond << '\n'); + DEBUG(dbgs() << " Change compare stride in Inst " << *OldCond); + DEBUG(dbgs() << " to " << *Cond << '\n'); // Remove the old compare instruction. The old indvar is probably dead too. DeadInsts.push_back(CondUse->getOperandValToReplace()); @@ -2403,7 +2363,7 @@ static bool isUsedByExitBranch(ICmpInst *Cond, Loop *L) { static bool ShouldCountToZero(ICmpInst *Cond, IVStrideUse* &CondUse, ScalarEvolution *SE, Loop *L, const TargetLowering *TLI = 0) { - if (!L->contains(Cond->getParent())) + if (!L->contains(Cond)) return false; if (!isa<SCEVConstant>(CondUse->getOffset())) @@ -2529,7 +2489,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { if (!UsePostInc) continue; - DEBUG(errs() << " Change loop exiting icmp to use postinc iv: " + DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: " << *Cond << '\n'); // It's possible for the setcc instruction to be anywhere in the loop, and @@ -2608,9 +2568,9 @@ bool LoopStrengthReduce::OptimizeLoopCountIVOfStride(const SCEV* &Stride, } // Replace the increment with a decrement. - DEBUG(errs() << "LSR: Examining use "); - DEBUG(WriteAsOperand(errs(), CondOp0, /*PrintType=*/false)); - DEBUG(errs() << " in Inst: " << *Cond << '\n'); + DEBUG(dbgs() << "LSR: Examining use "); + DEBUG(WriteAsOperand(dbgs(), CondOp0, /*PrintType=*/false)); + DEBUG(dbgs() << " in Inst: " << *Cond << '\n'); BinaryOperator *Decr = BinaryOperator::Create(Instruction::Sub, Incr->getOperand(0), Incr->getOperand(1), "tmp", Incr); Incr->replaceAllUsesWith(Decr); @@ -2624,7 +2584,7 @@ bool LoopStrengthReduce::OptimizeLoopCountIVOfStride(const SCEV* &Stride, unsigned InBlock = L->contains(PHIExpr->getIncomingBlock(0)) ? 1 : 0; Value *StartVal = PHIExpr->getIncomingValue(InBlock); Value *EndVal = Cond->getOperand(1); - DEBUG(errs() << " Optimize loop counting iv to count down [" + DEBUG(dbgs() << " Optimize loop counting iv to count down [" << *EndVal << " .. " << *StartVal << "]\n"); // FIXME: check for case where both are constant. @@ -2633,7 +2593,7 @@ bool LoopStrengthReduce::OptimizeLoopCountIVOfStride(const SCEV* &Stride, EndVal, StartVal, "tmp", PreInsertPt); PHIExpr->setIncomingValue(InBlock, NewStartVal); Cond->setOperand(1, Zero); - DEBUG(errs() << " New icmp: " << *Cond << "\n"); + DEBUG(dbgs() << " New icmp: " << *Cond << "\n"); int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue(); const SCEV *NewStride = 0; @@ -2716,9 +2676,9 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { return false; if (!IU->IVUsesByStride.empty()) { - DEBUG(errs() << "\nLSR on \"" << L->getHeader()->getParent()->getName() + DEBUG(dbgs() << "\nLSR on \"" << L->getHeader()->getParent()->getName() << "\" "; - L->dump()); + L->print(dbgs())); // Sort the StrideOrder so we process larger strides first. std::stable_sort(IU->StrideOrder.begin(), IU->StrideOrder.end(), @@ -2758,8 +2718,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { IVsByStride.clear(); // Clean up after ourselves - if (!DeadInsts.empty()) - DeleteTriviallyDeadInstructions(); + DeleteTriviallyDeadInstructions(); } // At this point, it is worth checking to see if any recurrence PHIs are also diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index b7adfdc..0c19133 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -877,7 +877,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, for (unsigned i = 0, e = Users.size(); i != e; ++i) if (Instruction *U = cast<Instruction>(Users[i])) { - if (!L->contains(U->getParent())) + if (!L->contains(U)) continue; U->replaceUsesOfWith(LIC, Replacement); Worklist.push_back(U); @@ -888,7 +888,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, // can. This case occurs when we unswitch switch statements. for (unsigned i = 0, e = Users.size(); i != e; ++i) if (Instruction *U = cast<Instruction>(Users[i])) { - if (!L->contains(U->getParent())) + if (!L->contains(U)) continue; Worklist.push_back(U); diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 8466918..827b47d 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This pass reassociates commutative expressions in an order that is designed -// to promote better constant propagation, GCSE, LICM, PRE... +// to promote better constant propagation, GCSE, LICM, PRE, etc. // // For example: 4 + (x + 5) -> x + (4 + 5) // @@ -35,8 +35,8 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/DenseMap.h" #include <algorithm> -#include <map> using namespace llvm; STATISTIC(NumLinear , "Number of insts linearized"); @@ -58,21 +58,22 @@ namespace { #ifndef NDEBUG /// PrintOps - Print out the expression identified in the Ops list. /// -static void PrintOps(Instruction *I, const std::vector<ValueEntry> &Ops) { +static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) { Module *M = I->getParent()->getParent()->getParent(); errs() << Instruction::getOpcodeName(I->getOpcode()) << " " - << *Ops[0].Op->getType(); + << *Ops[0].Op->getType() << '\t'; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - WriteAsOperand(errs() << " ", Ops[i].Op, false, M); - errs() << "," << Ops[i].Rank; + errs() << "[ "; + WriteAsOperand(errs(), Ops[i].Op, false, M); + errs() << ", #" << Ops[i].Rank << "] "; } } #endif namespace { class Reassociate : public FunctionPass { - std::map<BasicBlock*, unsigned> RankMap; - std::map<AssertingVH<>, unsigned> ValueRankMap; + DenseMap<BasicBlock*, unsigned> RankMap; + DenseMap<AssertingVH<>, unsigned> ValueRankMap; bool MadeChange; public: static char ID; // Pass identification, replacement for typeid @@ -86,11 +87,13 @@ namespace { private: void BuildRankMap(Function &F); unsigned getRank(Value *V); - void ReassociateExpression(BinaryOperator *I); - void RewriteExprTree(BinaryOperator *I, std::vector<ValueEntry> &Ops, + Value *ReassociateExpression(BinaryOperator *I); + void RewriteExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops, unsigned Idx = 0); - Value *OptimizeExpression(BinaryOperator *I, std::vector<ValueEntry> &Ops); - void LinearizeExprTree(BinaryOperator *I, std::vector<ValueEntry> &Ops); + Value *OptimizeExpression(BinaryOperator *I, + SmallVectorImpl<ValueEntry> &Ops); + Value *OptimizeAdd(Instruction *I, SmallVectorImpl<ValueEntry> &Ops); + void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops); void LinearizeExpr(BinaryOperator *I); Value *RemoveFactorFromExpression(Value *V, Value *Factor); void ReassociateBB(BasicBlock *BB); @@ -107,10 +110,13 @@ FunctionPass *llvm::createReassociatePass() { return new Reassociate(); } void Reassociate::RemoveDeadBinaryOp(Value *V) { Instruction *Op = dyn_cast<Instruction>(V); - if (!Op || !isa<BinaryOperator>(Op) || !isa<CmpInst>(Op) || !Op->use_empty()) + if (!Op || !isa<BinaryOperator>(Op) || !Op->use_empty()) return; Value *LHS = Op->getOperand(0), *RHS = Op->getOperand(1); + + ValueRankMap.erase(Op); + Op->eraseFromParent(); RemoveDeadBinaryOp(LHS); RemoveDeadBinaryOp(RHS); } @@ -156,13 +162,14 @@ void Reassociate::BuildRankMap(Function &F) { } unsigned Reassociate::getRank(Value *V) { - if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument... - Instruction *I = dyn_cast<Instruction>(V); - if (I == 0) return 0; // Otherwise it's a global or constant, rank 0. + if (I == 0) { + if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument. + return 0; // Otherwise it's a global or constant, rank 0. + } - unsigned &CachedRank = ValueRankMap[I]; - if (CachedRank) return CachedRank; // Rank already known? + if (unsigned Rank = ValueRankMap[I]) + return Rank; // Rank already known? // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that // we can reassociate expressions for code motion! Since we do not recurse @@ -182,7 +189,7 @@ unsigned Reassociate::getRank(Value *V) { //DEBUG(errs() << "Calculated Rank[" << V->getName() << "] = " // << Rank << "\n"); - return CachedRank = Rank; + return ValueRankMap[I] = Rank; } /// isReassociableOp - Return true if V is an instruction of the specified @@ -197,7 +204,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { /// LowerNegateToMultiply - Replace 0-X with X*-1. /// static Instruction *LowerNegateToMultiply(Instruction *Neg, - std::map<AssertingVH<>, unsigned> &ValueRankMap) { + DenseMap<AssertingVH<>, unsigned> &ValueRankMap) { Constant *Cst = Constant::getAllOnesValue(Neg->getType()); Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg); @@ -250,7 +257,7 @@ void Reassociate::LinearizeExpr(BinaryOperator *I) { /// caller MUST use something like RewriteExprTree to put the values back in. /// void Reassociate::LinearizeExprTree(BinaryOperator *I, - std::vector<ValueEntry> &Ops) { + SmallVectorImpl<ValueEntry> &Ops) { Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); unsigned Opcode = I->getOpcode(); @@ -282,15 +289,15 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I, I->setOperand(0, UndefValue::get(I->getType())); I->setOperand(1, UndefValue::get(I->getType())); return; - } else { - // Turn X+(Y+Z) -> (Y+Z)+X - std::swap(LHSBO, RHSBO); - std::swap(LHS, RHS); - bool Success = !I->swapOperands(); - assert(Success && "swapOperands failed"); - Success = false; - MadeChange = true; } + + // Turn X+(Y+Z) -> (Y+Z)+X + std::swap(LHSBO, RHSBO); + std::swap(LHS, RHS); + bool Success = !I->swapOperands(); + assert(Success && "swapOperands failed"); + Success = false; + MadeChange = true; } else if (RHSBO) { // Turn (A+B)+(C+D) -> (((A+B)+C)+D). This guarantees the the RHS is not // part of the expression tree. @@ -322,7 +329,7 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I, // linearized and optimized, emit them in-order. This function is written to be // tail recursive. void Reassociate::RewriteExprTree(BinaryOperator *I, - std::vector<ValueEntry> &Ops, + SmallVectorImpl<ValueEntry> &Ops, unsigned i) { if (i+2 == Ops.size()) { if (I->getOperand(0) != Ops[i].Op || @@ -369,6 +376,9 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, // that should be processed next by the reassociation pass. // static Value *NegateValue(Value *V, Instruction *BI) { + if (Constant *C = dyn_cast<Constant>(V)) + return ConstantExpr::getNeg(C); + // We are trying to expose opportunity for reassociation. One of the things // that we want to do to achieve this is to push a negation as deep into an // expression chain as possible, to expose the add instructions. In practice, @@ -376,7 +386,7 @@ static Value *NegateValue(Value *V, Instruction *BI) { // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate // the constants. We assume that instcombine will clean up the mess later if - // we introduce tons of unnecessary negation instructions... + // we introduce tons of unnecessary negation instructions. // if (Instruction *I = dyn_cast<Instruction>(V)) if (I->getOpcode() == Instruction::Add && I->hasOneUse()) { @@ -393,10 +403,36 @@ static Value *NegateValue(Value *V, Instruction *BI) { I->setName(I->getName()+".neg"); return I; } + + // Okay, we need to materialize a negated version of V with an instruction. + // Scan the use lists of V to see if we have one already. + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ + if (!BinaryOperator::isNeg(*UI)) continue; + + // We found one! Now we have to make sure that the definition dominates + // this use. We do this by moving it to the entry block (if it is a + // non-instruction value) or right after the definition. These negates will + // be zapped by reassociate later, so we don't need much finesse here. + BinaryOperator *TheNeg = cast<BinaryOperator>(*UI); + + BasicBlock::iterator InsertPt; + if (Instruction *InstInput = dyn_cast<Instruction>(V)) { + if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) { + InsertPt = II->getNormalDest()->begin(); + } else { + InsertPt = InstInput; + ++InsertPt; + } + while (isa<PHINode>(InsertPt)) ++InsertPt; + } else { + InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin(); + } + TheNeg->moveBefore(InsertPt); + return TheNeg; + } // Insert a 'neg' instruction that subtracts the value from zero to get the // negation. - // return BinaryOperator::CreateNeg(V, V->getName() + ".neg", BI); } @@ -427,12 +463,12 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) { /// only used by an add, transform this into (X+(0-Y)) to promote better /// reassociation. static Instruction *BreakUpSubtract(Instruction *Sub, - std::map<AssertingVH<>, unsigned> &ValueRankMap) { - // Convert a subtract into an add and a neg instruction... so that sub - // instructions can be commuted with other add instructions... + DenseMap<AssertingVH<>, unsigned> &ValueRankMap) { + // Convert a subtract into an add and a neg instruction. This allows sub + // instructions to be commuted with other add instructions. // - // Calculate the negative value of Operand 1 of the sub instruction... - // and set it as the RHS of the add instruction we just made... + // Calculate the negative value of Operand 1 of the sub instruction, + // and set it as the RHS of the add instruction we just made. // Value *NegVal = NegateValue(Sub->getOperand(1), Sub); Instruction *New = @@ -452,7 +488,7 @@ static Instruction *BreakUpSubtract(Instruction *Sub, /// by one, change this into a multiply by a constant to assist with further /// reassociation. static Instruction *ConvertShiftToMul(Instruction *Shl, - std::map<AssertingVH<>, unsigned> &ValueRankMap) { + DenseMap<AssertingVH<>, unsigned> &ValueRankMap) { // If an operand of this shift is a reassociable multiply, or if the shift // is used by a reassociable multiply or add, turn into a multiply. if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) || @@ -460,11 +496,10 @@ static Instruction *ConvertShiftToMul(Instruction *Shl, (isReassociableOp(Shl->use_back(), Instruction::Mul) || isReassociableOp(Shl->use_back(), Instruction::Add)))) { Constant *MulCst = ConstantInt::get(Shl->getType(), 1); - MulCst = - ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1))); + MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1))); - Instruction *Mul = BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, - "", Shl); + Instruction *Mul = + BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl); ValueRankMap.erase(Shl); Mul->takeName(Shl); Shl->replaceAllUsesWith(Mul); @@ -475,15 +510,16 @@ static Instruction *ConvertShiftToMul(Instruction *Shl, } // Scan backwards and forwards among values with the same rank as element i to -// see if X exists. If X does not exist, return i. -static unsigned FindInOperandList(std::vector<ValueEntry> &Ops, unsigned i, +// see if X exists. If X does not exist, return i. This is useful when +// scanning for 'x' when we see '-x' because they both get the same rank. +static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i, Value *X) { unsigned XRank = Ops[i].Rank; unsigned e = Ops.size(); for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) if (Ops[j].Op == X) return j; - // Scan backwards + // Scan backwards. for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j) if (Ops[j].Op == X) return j; @@ -492,7 +528,7 @@ static unsigned FindInOperandList(std::vector<ValueEntry> &Ops, unsigned i, /// EmitAddTreeOfValues - Emit a tree of add instructions, summing Ops together /// and returning the result. Insert the tree before I. -static Value *EmitAddTreeOfValues(Instruction *I, std::vector<Value*> &Ops) { +static Value *EmitAddTreeOfValues(Instruction *I, SmallVectorImpl<Value*> &Ops){ if (Ops.size() == 1) return Ops.back(); Value *V1 = Ops.back(); @@ -508,32 +544,57 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { BinaryOperator *BO = isReassociableOp(V, Instruction::Mul); if (!BO) return 0; - std::vector<ValueEntry> Factors; + SmallVector<ValueEntry, 8> Factors; LinearizeExprTree(BO, Factors); bool FoundFactor = false; - for (unsigned i = 0, e = Factors.size(); i != e; ++i) + bool NeedsNegate = false; + for (unsigned i = 0, e = Factors.size(); i != e; ++i) { if (Factors[i].Op == Factor) { FoundFactor = true; Factors.erase(Factors.begin()+i); break; } + + // If this is a negative version of this factor, remove it. + if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor)) + if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op)) + if (FC1->getValue() == -FC2->getValue()) { + FoundFactor = NeedsNegate = true; + Factors.erase(Factors.begin()+i); + break; + } + } + if (!FoundFactor) { // Make sure to restore the operands to the expression tree. RewriteExprTree(BO, Factors); return 0; } - if (Factors.size() == 1) return Factors[0].Op; + BasicBlock::iterator InsertPt = BO; ++InsertPt; + + // If this was just a single multiply, remove the multiply and return the only + // remaining operand. + if (Factors.size() == 1) { + ValueRankMap.erase(BO); + BO->eraseFromParent(); + V = Factors[0].Op; + } else { + RewriteExprTree(BO, Factors); + V = BO; + } - RewriteExprTree(BO, Factors); - return BO; + if (NeedsNegate) + V = BinaryOperator::CreateNeg(V, "neg", InsertPt); + + return V; } /// FindSingleUseMultiplyFactors - If V is a single-use multiply, recursively /// add its operands as factors, otherwise add V to the list of factors. static void FindSingleUseMultiplyFactors(Value *V, - std::vector<Value*> &Factors) { + SmallVectorImpl<Value*> &Factors) { BinaryOperator *BO; if ((!V->hasOneUse() && !V->use_empty()) || !(BO = dyn_cast<BinaryOperator>(V)) || @@ -547,10 +608,228 @@ static void FindSingleUseMultiplyFactors(Value *V, FindSingleUseMultiplyFactors(BO->getOperand(0), Factors); } +/// OptimizeAndOrXor - Optimize a series of operands to an 'and', 'or', or 'xor' +/// instruction. This optimizes based on identities. If it can be reduced to +/// a single Value, it is returned, otherwise the Ops list is mutated as +/// necessary. +static Value *OptimizeAndOrXor(unsigned Opcode, + SmallVectorImpl<ValueEntry> &Ops) { + // Scan the operand lists looking for X and ~X pairs, along with X,X pairs. + // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + // First, check for X and ~X in the operand list. + assert(i < Ops.size()); + if (BinaryOperator::isNot(Ops[i].Op)) { // Cannot occur for ^. + Value *X = BinaryOperator::getNotArgument(Ops[i].Op); + unsigned FoundX = FindInOperandList(Ops, i, X); + if (FoundX != i) { + if (Opcode == Instruction::And) // ...&X&~X = 0 + return Constant::getNullValue(X->getType()); + + if (Opcode == Instruction::Or) // ...|X|~X = -1 + return Constant::getAllOnesValue(X->getType()); + } + } + + // Next, check for duplicate pairs of values, which we assume are next to + // each other, due to our sorting criteria. + assert(i < Ops.size()); + if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) { + if (Opcode == Instruction::And || Opcode == Instruction::Or) { + // Drop duplicate values for And and Or. + Ops.erase(Ops.begin()+i); + --i; --e; + ++NumAnnihil; + continue; + } + + // Drop pairs of values for Xor. + assert(Opcode == Instruction::Xor); + if (e == 2) + return Constant::getNullValue(Ops[0].Op->getType()); + + // Y ^ X^X -> Y + Ops.erase(Ops.begin()+i, Ops.begin()+i+2); + i -= 1; e -= 2; + ++NumAnnihil; + } + } + return 0; +} +/// OptimizeAdd - Optimize a series of operands to an 'add' instruction. This +/// optimizes based on identities. If it can be reduced to a single Value, it +/// is returned, otherwise the Ops list is mutated as necessary. +Value *Reassociate::OptimizeAdd(Instruction *I, + SmallVectorImpl<ValueEntry> &Ops) { + // Scan the operand lists looking for X and -X pairs. If we find any, we + // can simplify the expression. X+-X == 0. While we're at it, scan for any + // duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z. + // + // TODO: We could handle "X + ~X" -> "-1" if we wanted, since "-X = ~X+1". + // + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + Value *TheOp = Ops[i].Op; + // Check to see if we've seen this operand before. If so, we factor all + // instances of the operand together. Due to our sorting criteria, we know + // that these need to be next to each other in the vector. + if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) { + // Rescan the list, remove all instances of this operand from the expr. + unsigned NumFound = 0; + do { + Ops.erase(Ops.begin()+i); + ++NumFound; + } while (i != Ops.size() && Ops[i].Op == TheOp); + + DEBUG(errs() << "\nFACTORING [" << NumFound << "]: " << *TheOp << '\n'); + ++NumFactor; + + // Insert a new multiply. + Value *Mul = ConstantInt::get(cast<IntegerType>(I->getType()), NumFound); + Mul = BinaryOperator::CreateMul(TheOp, Mul, "factor", I); + + // Now that we have inserted a multiply, optimize it. This allows us to + // handle cases that require multiple factoring steps, such as this: + // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6 + Mul = ReassociateExpression(cast<BinaryOperator>(Mul)); + + // If every add operand was a duplicate, return the multiply. + if (Ops.empty()) + return Mul; + + // Otherwise, we had some input that didn't have the dupe, such as + // "A + A + B" -> "A*2 + B". Add the new multiply to the list of + // things being added by this operation. + Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul)); + + --i; + e = Ops.size(); + continue; + } + + // Check for X and -X in the operand list. + if (!BinaryOperator::isNeg(TheOp)) + continue; + + Value *X = BinaryOperator::getNegArgument(TheOp); + unsigned FoundX = FindInOperandList(Ops, i, X); + if (FoundX == i) + continue; + + // Remove X and -X from the operand list. + if (Ops.size() == 2) + return Constant::getNullValue(X->getType()); + + Ops.erase(Ops.begin()+i); + if (i < FoundX) + --FoundX; + else + --i; // Need to back up an extra one. + Ops.erase(Ops.begin()+FoundX); + ++NumAnnihil; + --i; // Revisit element. + e -= 2; // Removed two elements. + } + + // Scan the operand list, checking to see if there are any common factors + // between operands. Consider something like A*A+A*B*C+D. We would like to + // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies. + // To efficiently find this, we count the number of times a factor occurs + // for any ADD operands that are MULs. + DenseMap<Value*, unsigned> FactorOccurrences; + + // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4) + // where they are actually the same multiply. + unsigned MaxOcc = 0; + Value *MaxOccVal = 0; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op); + if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty()) + continue; + + // Compute all of the factors of this added value. + SmallVector<Value*, 8> Factors; + FindSingleUseMultiplyFactors(BOp, Factors); + assert(Factors.size() > 1 && "Bad linearize!"); + + // Add one to FactorOccurrences for each unique factor in this op. + SmallPtrSet<Value*, 8> Duplicates; + for (unsigned i = 0, e = Factors.size(); i != e; ++i) { + Value *Factor = Factors[i]; + if (!Duplicates.insert(Factor)) continue; + + unsigned Occ = ++FactorOccurrences[Factor]; + if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; } + + // If Factor is a negative constant, add the negated value as a factor + // because we can percolate the negate out. Watch for minint, which + // cannot be positivified. + if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) + if (CI->getValue().isNegative() && !CI->getValue().isMinSignedValue()) { + Factor = ConstantInt::get(CI->getContext(), -CI->getValue()); + assert(!Duplicates.count(Factor) && + "Shouldn't have two constant factors, missed a canonicalize"); + + unsigned Occ = ++FactorOccurrences[Factor]; + if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; } + } + } + } + + // If any factor occurred more than one time, we can pull it out. + if (MaxOcc > 1) { + DEBUG(errs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << '\n'); + ++NumFactor; + + // Create a new instruction that uses the MaxOccVal twice. If we don't do + // this, we could otherwise run into situations where removing a factor + // from an expression will drop a use of maxocc, and this can cause + // RemoveFactorFromExpression on successive values to behave differently. + Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal); + SmallVector<Value*, 4> NewMulOps; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) { + NewMulOps.push_back(V); + Ops.erase(Ops.begin()+i); + --i; --e; + } + } + + // No need for extra uses anymore. + delete DummyInst; + + unsigned NumAddedValues = NewMulOps.size(); + Value *V = EmitAddTreeOfValues(I, NewMulOps); + + // Now that we have inserted the add tree, optimize it. This allows us to + // handle cases that require multiple factoring steps, such as this: + // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C)) + assert(NumAddedValues > 1 && "Each occurrence should contribute a value"); + V = ReassociateExpression(cast<BinaryOperator>(V)); + + // Create the multiply. + Value *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I); + + // Rerun associate on the multiply in case the inner expression turned into + // a multiply. We want to make sure that we keep things in canonical form. + V2 = ReassociateExpression(cast<BinaryOperator>(V2)); + + // If every add operand included the factor (e.g. "A*B + A*C"), then the + // entire result expression is just the multiply "A*(B+C)". + if (Ops.empty()) + return V2; + + // Otherwise, we had some input that didn't have the factor, such as + // "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of + // things being added by this operation. + Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2)); + } + + return 0; +} Value *Reassociate::OptimizeExpression(BinaryOperator *I, - std::vector<ValueEntry> &Ops) { + SmallVectorImpl<ValueEntry> &Ops) { // Now that we have the linearized expression tree, try to optimize it. // Start by folding any constants that we found. bool IterateOptimization = false; @@ -570,198 +849,53 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, switch (Opcode) { default: break; case Instruction::And: - if (CstVal->isZero()) { // ... & 0 -> 0 - ++NumAnnihil; + if (CstVal->isZero()) // X & 0 -> 0 return CstVal; - } else if (CstVal->isAllOnesValue()) { // ... & -1 -> ... + if (CstVal->isAllOnesValue()) // X & -1 -> X Ops.pop_back(); - } break; case Instruction::Mul: - if (CstVal->isZero()) { // ... * 0 -> 0 + if (CstVal->isZero()) { // X * 0 -> 0 ++NumAnnihil; return CstVal; - } else if (cast<ConstantInt>(CstVal)->isOne()) { - Ops.pop_back(); // ... * 1 -> ... } + + if (cast<ConstantInt>(CstVal)->isOne()) + Ops.pop_back(); // X * 1 -> X break; case Instruction::Or: - if (CstVal->isAllOnesValue()) { // ... | -1 -> -1 - ++NumAnnihil; + if (CstVal->isAllOnesValue()) // X | -1 -> -1 return CstVal; - } // FALLTHROUGH! case Instruction::Add: case Instruction::Xor: - if (CstVal->isZero()) // ... [|^+] 0 -> ... + if (CstVal->isZero()) // X [|^+] 0 -> X Ops.pop_back(); break; } if (Ops.size() == 1) return Ops[0].Op; - // Handle destructive annihilation do to identities between elements in the + // Handle destructive annihilation due to identities between elements in the // argument list here. switch (Opcode) { default: break; case Instruction::And: case Instruction::Or: - case Instruction::Xor: - // Scan the operand lists looking for X and ~X pairs, along with X,X pairs. - // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1. - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - // First, check for X and ~X in the operand list. - assert(i < Ops.size()); - if (BinaryOperator::isNot(Ops[i].Op)) { // Cannot occur for ^. - Value *X = BinaryOperator::getNotArgument(Ops[i].Op); - unsigned FoundX = FindInOperandList(Ops, i, X); - if (FoundX != i) { - if (Opcode == Instruction::And) { // ...&X&~X = 0 - ++NumAnnihil; - return Constant::getNullValue(X->getType()); - } else if (Opcode == Instruction::Or) { // ...|X|~X = -1 - ++NumAnnihil; - return Constant::getAllOnesValue(X->getType()); - } - } - } - - // Next, check for duplicate pairs of values, which we assume are next to - // each other, due to our sorting criteria. - assert(i < Ops.size()); - if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) { - if (Opcode == Instruction::And || Opcode == Instruction::Or) { - // Drop duplicate values. - Ops.erase(Ops.begin()+i); - --i; --e; - IterateOptimization = true; - ++NumAnnihil; - } else { - assert(Opcode == Instruction::Xor); - if (e == 2) { - ++NumAnnihil; - return Constant::getNullValue(Ops[0].Op->getType()); - } - // ... X^X -> ... - Ops.erase(Ops.begin()+i, Ops.begin()+i+2); - i -= 1; e -= 2; - IterateOptimization = true; - ++NumAnnihil; - } - } - } + case Instruction::Xor: { + unsigned NumOps = Ops.size(); + if (Value *Result = OptimizeAndOrXor(Opcode, Ops)) + return Result; + IterateOptimization |= Ops.size() != NumOps; break; + } - case Instruction::Add: - // Scan the operand lists looking for X and -X pairs. If we find any, we - // can simplify the expression. X+-X == 0. - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - assert(i < Ops.size()); - // Check for X and -X in the operand list. - if (BinaryOperator::isNeg(Ops[i].Op)) { - Value *X = BinaryOperator::getNegArgument(Ops[i].Op); - unsigned FoundX = FindInOperandList(Ops, i, X); - if (FoundX != i) { - // Remove X and -X from the operand list. - if (Ops.size() == 2) { - ++NumAnnihil; - return Constant::getNullValue(X->getType()); - } else { - Ops.erase(Ops.begin()+i); - if (i < FoundX) - --FoundX; - else - --i; // Need to back up an extra one. - Ops.erase(Ops.begin()+FoundX); - IterateOptimization = true; - ++NumAnnihil; - --i; // Revisit element. - e -= 2; // Removed two elements. - } - } - } - } - - - // Scan the operand list, checking to see if there are any common factors - // between operands. Consider something like A*A+A*B*C+D. We would like to - // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies. - // To efficiently find this, we count the number of times a factor occurs - // for any ADD operands that are MULs. - std::map<Value*, unsigned> FactorOccurrences; - unsigned MaxOcc = 0; - Value *MaxOccVal = 0; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op)) { - if (BOp->getOpcode() == Instruction::Mul && BOp->use_empty()) { - // Compute all of the factors of this added value. - std::vector<Value*> Factors; - FindSingleUseMultiplyFactors(BOp, Factors); - assert(Factors.size() > 1 && "Bad linearize!"); - - // Add one to FactorOccurrences for each unique factor in this op. - if (Factors.size() == 2) { - unsigned Occ = ++FactorOccurrences[Factors[0]]; - if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factors[0]; } - if (Factors[0] != Factors[1]) { // Don't double count A*A. - Occ = ++FactorOccurrences[Factors[1]]; - if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factors[1]; } - } - } else { - std::set<Value*> Duplicates; - for (unsigned i = 0, e = Factors.size(); i != e; ++i) { - if (Duplicates.insert(Factors[i]).second) { - unsigned Occ = ++FactorOccurrences[Factors[i]]; - if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factors[i]; } - } - } - } - } - } - } - - // If any factor occurred more than one time, we can pull it out. - if (MaxOcc > 1) { - DEBUG(errs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << "\n"); - - // Create a new instruction that uses the MaxOccVal twice. If we don't do - // this, we could otherwise run into situations where removing a factor - // from an expression will drop a use of maxocc, and this can cause - // RemoveFactorFromExpression on successive values to behave differently. - Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal); - std::vector<Value*> NewMulOps; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) { - NewMulOps.push_back(V); - Ops.erase(Ops.begin()+i); - --i; --e; - } - } - - // No need for extra uses anymore. - delete DummyInst; - - unsigned NumAddedValues = NewMulOps.size(); - Value *V = EmitAddTreeOfValues(I, NewMulOps); - Value *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I); - - // Now that we have inserted V and its sole use, optimize it. This allows - // us to handle cases that require multiple factoring steps, such as this: - // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C)) - if (NumAddedValues > 1) - ReassociateExpression(cast<BinaryOperator>(V)); - - ++NumFactor; - - if (Ops.empty()) - return V2; + case Instruction::Add: { + unsigned NumOps = Ops.size(); + if (Value *Result = OptimizeAdd(I, Ops)) + return Result; + IterateOptimization |= Ops.size() != NumOps; + } - // Add the new value to the list of things being added. - Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2)); - - // Rewrite the tree so that there is now a use of V. - RewriteExprTree(I, Ops); - return OptimizeExpression(I, Ops); - } break; //case Instruction::Mul: } @@ -826,13 +960,14 @@ void Reassociate::ReassociateBB(BasicBlock *BB) { } } -void Reassociate::ReassociateExpression(BinaryOperator *I) { +Value *Reassociate::ReassociateExpression(BinaryOperator *I) { - // First, walk the expression tree, linearizing the tree, collecting - std::vector<ValueEntry> Ops; + // First, walk the expression tree, linearizing the tree, collecting the + // operand information. + SmallVector<ValueEntry, 8> Ops; LinearizeExprTree(I, Ops); - DEBUG(errs() << "RAIn:\t"; PrintOps(I, Ops); errs() << "\n"); + DEBUG(errs() << "RAIn:\t"; PrintOps(I, Ops); errs() << '\n'); // Now that we have linearized the tree to a list and have gathered all of // the operands and their ranks, sort the operands by their rank. Use a @@ -847,10 +982,11 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) { if (Value *V = OptimizeExpression(I, Ops)) { // This expression tree simplified to something that isn't a tree, // eliminate it. - DEBUG(errs() << "Reassoc to scalar: " << *V << "\n"); + DEBUG(errs() << "Reassoc to scalar: " << *V << '\n'); I->replaceAllUsesWith(V); RemoveDeadBinaryOp(I); - return; + ++NumAnnihil; + return V; } // We want to sink immediates as deeply as possible except in the case where @@ -861,22 +997,24 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) { cast<Instruction>(I->use_back())->getOpcode() == Instruction::Add && isa<ConstantInt>(Ops.back().Op) && cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) { - Ops.insert(Ops.begin(), Ops.back()); - Ops.pop_back(); + ValueEntry Tmp = Ops.pop_back_val(); + Ops.insert(Ops.begin(), Tmp); } - DEBUG(errs() << "RAOut:\t"; PrintOps(I, Ops); errs() << "\n"); + DEBUG(errs() << "RAOut:\t"; PrintOps(I, Ops); errs() << '\n'); if (Ops.size() == 1) { // This expression tree simplified to something that isn't a tree, // eliminate it. I->replaceAllUsesWith(Ops[0].Op); RemoveDeadBinaryOp(I); - } else { - // Now that we ordered and optimized the expressions, splat them back into - // the expression tree, removing any unneeded nodes. - RewriteExprTree(I, Ops); + return Ops[0].Op; } + + // Now that we ordered and optimized the expressions, splat them back into + // the expression tree, removing any unneeded nodes. + RewriteExprTree(I, Ops); + return I; } @@ -888,7 +1026,7 @@ bool Reassociate::runOnFunction(Function &F) { for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) ReassociateBB(FI); - // We are done with the rank map... + // We are done with the rank map. RankMap.clear(); ValueRankMap.clear(); return MadeChange; diff --git a/lib/Transforms/Scalar/SCCVN.cpp b/lib/Transforms/Scalar/SCCVN.cpp index dbc82e1..f91fbda 100644 --- a/lib/Transforms/Scalar/SCCVN.cpp +++ b/lib/Transforms/Scalar/SCCVN.cpp @@ -34,7 +34,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Utils/SSAUpdater.h" -#include <cstdio> using namespace llvm; STATISTIC(NumSCCVNInstr, "Number of instructions deleted by SCCVN"); diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index b040a27..79bb7c5 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -74,6 +74,10 @@ namespace { private: TargetData *TD; + /// DeadInsts - Keep track of instructions we have made dead, so that + /// we can remove them after we are done working. + SmallVector<Value*, 32> DeadInsts; + /// AllocaInfo - When analyzing uses of an alloca instruction, this captures /// information about the uses. All these fields are initialized to false /// and set to true when something is learned. @@ -102,25 +106,29 @@ namespace { int isSafeAllocaToScalarRepl(AllocaInst *AI); - void isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, - AllocaInfo &Info); - void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, - AllocaInfo &Info); - void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, - unsigned OpNo, AllocaInfo &Info); - void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocaInst *AI, - AllocaInfo &Info); + void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, + AllocaInfo &Info); + void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset, + AllocaInfo &Info); + void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize, + const Type *MemOpType, bool isStore, AllocaInfo &Info); + bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size); + uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset, + const Type *&IdxTy); void DoScalarReplacement(AllocaInst *AI, std::vector<AllocaInst*> &WorkList); - void CleanupGEP(GetElementPtrInst *GEP); - void CleanupAllocaUsers(AllocaInst *AI); + void DeleteDeadInstructions(); + void CleanupAllocaUsers(Value *V); AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base); - void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI, - SmallVector<AllocaInst*, 32> &NewElts); - - void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, + void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, + SmallVector<AllocaInst*, 32> &NewElts); + void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, + SmallVector<AllocaInst*, 32> &NewElts); + void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, + SmallVector<AllocaInst*, 32> &NewElts); + void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, AllocaInst *AI, SmallVector<AllocaInst*, 32> &NewElts); void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, @@ -360,399 +368,350 @@ void SROA::DoScalarReplacement(AllocaInst *AI, } } - // Now that we have created the alloca instructions that we want to use, - // expand the getelementptr instructions to use them. - while (!AI->use_empty()) { - Instruction *User = cast<Instruction>(AI->use_back()); - if (BitCastInst *BCInst = dyn_cast<BitCastInst>(User)) { - RewriteBitCastUserOfAlloca(BCInst, AI, ElementAllocas); - BCInst->eraseFromParent(); - continue; - } - - // Replace: - // %res = load { i32, i32 }* %alloc - // with: - // %load.0 = load i32* %alloc.0 - // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 - // %load.1 = load i32* %alloc.1 - // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 - // (Also works for arrays instead of structs) - if (LoadInst *LI = dyn_cast<LoadInst>(User)) { - Value *Insert = UndefValue::get(LI->getType()); - for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { - Value *Load = new LoadInst(ElementAllocas[i], "load", LI); - Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); - } - LI->replaceAllUsesWith(Insert); - LI->eraseFromParent(); - continue; - } - - // Replace: - // store { i32, i32 } %val, { i32, i32 }* %alloc - // with: - // %val.0 = extractvalue { i32, i32 } %val, 0 - // store i32 %val.0, i32* %alloc.0 - // %val.1 = extractvalue { i32, i32 } %val, 1 - // store i32 %val.1, i32* %alloc.1 - // (Also works for arrays instead of structs) - if (StoreInst *SI = dyn_cast<StoreInst>(User)) { - Value *Val = SI->getOperand(0); - for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { - Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); - new StoreInst(Extract, ElementAllocas[i], SI); - } - SI->eraseFromParent(); - continue; - } - - GetElementPtrInst *GEPI = cast<GetElementPtrInst>(User); - // We now know that the GEP is of the form: GEP <ptr>, 0, <cst> - unsigned Idx = - (unsigned)cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue(); - - assert(Idx < ElementAllocas.size() && "Index out of range?"); - AllocaInst *AllocaToUse = ElementAllocas[Idx]; - - Value *RepValue; - if (GEPI->getNumOperands() == 3) { - // Do not insert a new getelementptr instruction with zero indices, only - // to have it optimized out later. - RepValue = AllocaToUse; - } else { - // We are indexing deeply into the structure, so we still need a - // getelement ptr instruction to finish the indexing. This may be - // expanded itself once the worklist is rerun. - // - SmallVector<Value*, 8> NewArgs; - NewArgs.push_back(Constant::getNullValue( - Type::getInt32Ty(AI->getContext()))); - NewArgs.append(GEPI->op_begin()+3, GEPI->op_end()); - RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(), - NewArgs.end(), "", GEPI); - RepValue->takeName(GEPI); - } - - // If this GEP is to the start of the aggregate, check for memcpys. - if (Idx == 0 && GEPI->hasAllZeroIndices()) - RewriteBitCastUserOfAlloca(GEPI, AI, ElementAllocas); - - // Move all of the users over to the new GEP. - GEPI->replaceAllUsesWith(RepValue); - // Delete the old GEP - GEPI->eraseFromParent(); - } + // Now that we have created the new alloca instructions, rewrite all the + // uses of the old alloca. + RewriteForScalarRepl(AI, AI, 0, ElementAllocas); - // Finally, delete the Alloca instruction + // Now erase any instructions that were made dead while rewriting the alloca. + DeleteDeadInstructions(); AI->eraseFromParent(); + NumReplaced++; } -/// isSafeElementUse - Check to see if this use is an allowed use for a -/// getelementptr instruction of an array aggregate allocation. isFirstElt -/// indicates whether Ptr is known to the start of the aggregate. -void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, - AllocaInfo &Info) { - for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); - I != E; ++I) { - Instruction *User = cast<Instruction>(*I); - switch (User->getOpcode()) { - case Instruction::Load: break; - case Instruction::Store: - // Store is ok if storing INTO the pointer, not storing the pointer - if (User->getOperand(0) == Ptr) return MarkUnsafe(Info); - break; - case Instruction::GetElementPtr: { - GetElementPtrInst *GEP = cast<GetElementPtrInst>(User); - bool AreAllZeroIndices = isFirstElt; - if (GEP->getNumOperands() > 1 && - (!isa<ConstantInt>(GEP->getOperand(1)) || - !cast<ConstantInt>(GEP->getOperand(1))->isZero())) - // Using pointer arithmetic to navigate the array. - return MarkUnsafe(Info); - - // Verify that any array subscripts are in range. - for (gep_type_iterator GEPIt = gep_type_begin(GEP), - E = gep_type_end(GEP); GEPIt != E; ++GEPIt) { - // Ignore struct elements, no extra checking needed for these. - if (isa<StructType>(*GEPIt)) - continue; - - // This GEP indexes an array. Verify that this is an in-range - // constant integer. Specifically, consider A[0][i]. We cannot know that - // the user isn't doing invalid things like allowing i to index an - // out-of-range subscript that accesses A[1]. Because of this, we have - // to reject SROA of any accesses into structs where any of the - // components are variables. - ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand()); - if (!IdxVal) return MarkUnsafe(Info); - - // Are all indices still zero? - AreAllZeroIndices &= IdxVal->isZero(); - - if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPIt)) { - if (IdxVal->getZExtValue() >= AT->getNumElements()) - return MarkUnsafe(Info); - } else if (const VectorType *VT = dyn_cast<VectorType>(*GEPIt)) { - if (IdxVal->getZExtValue() >= VT->getNumElements()) - return MarkUnsafe(Info); - } +/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list, +/// recursively including all their operands that become trivially dead. +void SROA::DeleteDeadInstructions() { + while (!DeadInsts.empty()) { + Instruction *I = cast<Instruction>(DeadInsts.pop_back_val()); + + for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) + if (Instruction *U = dyn_cast<Instruction>(*OI)) { + // Zero out the operand and see if it becomes trivially dead. + // (But, don't add allocas to the dead instruction list -- they are + // already on the worklist and will be deleted separately.) + *OI = 0; + if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U)) + DeadInsts.push_back(U); } - - isSafeElementUse(GEP, AreAllZeroIndices, AI, Info); - if (Info.isUnsafe) return; - break; - } - case Instruction::BitCast: - if (isFirstElt) { - isSafeUseOfBitCastedAllocation(cast<BitCastInst>(User), AI, Info); - if (Info.isUnsafe) return; - break; - } - DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); - return MarkUnsafe(Info); - case Instruction::Call: - if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { - if (isFirstElt) { - isSafeMemIntrinsicOnAllocation(MI, AI, I.getOperandNo(), Info); - if (Info.isUnsafe) return; - break; - } - } - DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); - return MarkUnsafe(Info); - default: - DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); - return MarkUnsafe(Info); - } - } - return; // All users look ok :) -} -/// AllUsersAreLoads - Return true if all users of this value are loads. -static bool AllUsersAreLoads(Value *Ptr) { - for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); - I != E; ++I) - if (cast<Instruction>(*I)->getOpcode() != Instruction::Load) - return false; - return true; -} - -/// isSafeUseOfAllocation - Check if this user is an allowed use for an -/// aggregate allocation. -void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, - AllocaInfo &Info) { - if (BitCastInst *C = dyn_cast<BitCastInst>(User)) - return isSafeUseOfBitCastedAllocation(C, AI, Info); - - if (LoadInst *LI = dyn_cast<LoadInst>(User)) - if (!LI->isVolatile()) - return;// Loads (returning a first class aggregrate) are always rewritable - - if (StoreInst *SI = dyn_cast<StoreInst>(User)) - if (!SI->isVolatile() && SI->getOperand(0) != AI) - return;// Store is ok if storing INTO the pointer, not storing the pointer - - GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User); - if (GEPI == 0) - return MarkUnsafe(Info); - - gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI); - - // The GEP is not safe to transform if not of the form "GEP <ptr>, 0, <cst>". - if (I == E || - I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) { - return MarkUnsafe(Info); + I->eraseFromParent(); } +} + +/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to +/// performing scalar replacement of alloca AI. The results are flagged in +/// the Info parameter. Offset indicates the position within AI that is +/// referenced by this instruction. +void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, + AllocaInfo &Info) { + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { + Instruction *User = cast<Instruction>(*UI); - ++I; - if (I == E) return MarkUnsafe(Info); // ran out of GEP indices?? - - bool IsAllZeroIndices = true; - - // If the first index is a non-constant index into an array, see if we can - // handle it as a special case. - if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) { - if (!isa<ConstantInt>(I.getOperand())) { - IsAllZeroIndices = 0; - uint64_t NumElements = AT->getNumElements(); - - // If this is an array index and the index is not constant, we cannot - // promote... that is unless the array has exactly one or two elements in - // it, in which case we CAN promote it, but we have to canonicalize this - // out if this is the only problem. - if ((NumElements == 1 || NumElements == 2) && - AllUsersAreLoads(GEPI)) { + if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { + isSafeForScalarRepl(BC, AI, Offset, Info); + } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { + uint64_t GEPOffset = Offset; + isSafeGEP(GEPI, AI, GEPOffset, Info); + if (!Info.isUnsafe) + isSafeForScalarRepl(GEPI, AI, GEPOffset, Info); + } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) { + ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); + if (Length) + isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0, + UI.getOperandNo() == 1, Info); + else + MarkUnsafe(Info); + } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { + if (!LI->isVolatile()) { + const Type *LIType = LI->getType(); + isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(LIType), + LIType, false, Info); + } else + MarkUnsafe(Info); + } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { + // Store is ok if storing INTO the pointer, not storing the pointer + if (!SI->isVolatile() && SI->getOperand(0) != I) { + const Type *SIType = SI->getOperand(0)->getType(); + isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(SIType), + SIType, true, Info); + } else + MarkUnsafe(Info); + } else if (isa<DbgInfoIntrinsic>(UI)) { + // If one user is DbgInfoIntrinsic then check if all users are + // DbgInfoIntrinsics. + if (OnlyUsedByDbgInfoIntrinsics(I)) { Info.needsCleanup = true; - return; // Canonicalization required! + return; } - return MarkUnsafe(Info); + MarkUnsafe(Info); + } else { + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); + MarkUnsafe(Info); } + if (Info.isUnsafe) return; } - +} + +/// isSafeGEP - Check if a GEP instruction can be handled for scalar +/// replacement. It is safe when all the indices are constant, in-bounds +/// references, and when the resulting offset corresponds to an element within +/// the alloca type. The results are flagged in the Info parameter. Upon +/// return, Offset is adjusted as specified by the GEP indices. +void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, + uint64_t &Offset, AllocaInfo &Info) { + gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); + if (GEPIt == E) + return; + // Walk through the GEP type indices, checking the types that this indexes // into. - for (; I != E; ++I) { + for (; GEPIt != E; ++GEPIt) { // Ignore struct elements, no extra checking needed for these. - if (isa<StructType>(*I)) + if (isa<StructType>(*GEPIt)) continue; - - ConstantInt *IdxVal = dyn_cast<ConstantInt>(I.getOperand()); - if (!IdxVal) return MarkUnsafe(Info); - // Are all indices still zero? - IsAllZeroIndices &= IdxVal->isZero(); - - if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) { - // This GEP indexes an array. Verify that this is an in-range constant - // integer. Specifically, consider A[0][i]. We cannot know that the user - // isn't doing invalid things like allowing i to index an out-of-range - // subscript that accesses A[1]. Because of this, we have to reject SROA - // of any accesses into structs where any of the components are variables. - if (IdxVal->getZExtValue() >= AT->getNumElements()) - return MarkUnsafe(Info); - } else if (const VectorType *VT = dyn_cast<VectorType>(*I)) { - if (IdxVal->getZExtValue() >= VT->getNumElements()) - return MarkUnsafe(Info); + ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand()); + if (!IdxVal) + return MarkUnsafe(Info); + } + + // Compute the offset due to this GEP and check if the alloca has a + // component element at that offset. + SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); + Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), + &Indices[0], Indices.size()); + if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0)) + MarkUnsafe(Info); +} + +/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI +/// alloca or has an offset and size that corresponds to a component element +/// within it. The offset checked here may have been formed from a GEP with a +/// pointer bitcasted to a different type. +void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize, + const Type *MemOpType, bool isStore, + AllocaInfo &Info) { + // Check if this is a load/store of the entire alloca. + if (Offset == 0 && MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) { + bool UsesAggregateType = (MemOpType == AI->getAllocatedType()); + // This is safe for MemIntrinsics (where MemOpType is 0), integer types + // (which are essentially the same as the MemIntrinsics, especially with + // regard to copying padding between elements), or references using the + // aggregate type of the alloca. + if (!MemOpType || isa<IntegerType>(MemOpType) || UsesAggregateType) { + if (!UsesAggregateType) { + if (isStore) + Info.isMemCpyDst = true; + else + Info.isMemCpySrc = true; + } + return; } } - - // If there are any non-simple uses of this getelementptr, make sure to reject - // them. - return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info); + // Check if the offset/size correspond to a component within the alloca type. + const Type *T = AI->getAllocatedType(); + if (TypeHasComponent(T, Offset, MemSize)) + return; + + return MarkUnsafe(Info); } -/// isSafeMemIntrinsicOnAllocation - Check if the specified memory -/// intrinsic can be promoted by SROA. At this point, we know that the operand -/// of the memintrinsic is a pointer to the beginning of the allocation. -void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, - unsigned OpNo, AllocaInfo &Info) { - // If not constant length, give up. - ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); - if (!Length) return MarkUnsafe(Info); - - // If not the whole aggregate, give up. - if (Length->getZExtValue() != - TD->getTypeAllocSize(AI->getType()->getElementType())) - return MarkUnsafe(Info); - - // We only know about memcpy/memset/memmove. - if (!isa<MemIntrinsic>(MI)) - return MarkUnsafe(Info); - - // Otherwise, we can transform it. Determine whether this is a memcpy/set - // into or out of the aggregate. - if (OpNo == 1) - Info.isMemCpyDst = true; - else { - assert(OpNo == 2); - Info.isMemCpySrc = true; +/// TypeHasComponent - Return true if T has a component type with the +/// specified offset and size. If Size is zero, do not check the size. +bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) { + const Type *EltTy; + uint64_t EltSize; + if (const StructType *ST = dyn_cast<StructType>(T)) { + const StructLayout *Layout = TD->getStructLayout(ST); + unsigned EltIdx = Layout->getElementContainingOffset(Offset); + EltTy = ST->getContainedType(EltIdx); + EltSize = TD->getTypeAllocSize(EltTy); + Offset -= Layout->getElementOffset(EltIdx); + } else if (const ArrayType *AT = dyn_cast<ArrayType>(T)) { + EltTy = AT->getElementType(); + EltSize = TD->getTypeAllocSize(EltTy); + if (Offset >= AT->getNumElements() * EltSize) + return false; + Offset %= EltSize; + } else { + return false; } + if (Offset == 0 && (Size == 0 || EltSize == Size)) + return true; + // Check if the component spans multiple elements. + if (Offset + Size > EltSize) + return false; + return TypeHasComponent(EltTy, Offset, Size); } -/// isSafeUseOfBitCastedAllocation - Check if all users of this bitcast -/// from an alloca are safe for SROA of that alloca. -void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocaInst *AI, - AllocaInfo &Info) { - for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end(); - UI != E; ++UI) { - if (BitCastInst *BCU = dyn_cast<BitCastInst>(UI)) { - isSafeUseOfBitCastedAllocation(BCU, AI, Info); - } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) { - isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info); - } else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) { - if (SI->isVolatile()) - return MarkUnsafe(Info); - - // If storing the entire alloca in one chunk through a bitcasted pointer - // to integer, we can transform it. This happens (for example) when you - // cast a {i32,i32}* to i64* and store through it. This is similar to the - // memcpy case and occurs in various "byval" cases and emulated memcpys. - if (isa<IntegerType>(SI->getOperand(0)->getType()) && - TD->getTypeAllocSize(SI->getOperand(0)->getType()) == - TD->getTypeAllocSize(AI->getType()->getElementType())) { - Info.isMemCpyDst = true; - continue; - } - return MarkUnsafe(Info); - } else if (LoadInst *LI = dyn_cast<LoadInst>(UI)) { - if (LI->isVolatile()) - return MarkUnsafe(Info); - - // If loading the entire alloca in one chunk through a bitcasted pointer - // to integer, we can transform it. This happens (for example) when you - // cast a {i32,i32}* to i64* and load through it. This is similar to the - // memcpy case and occurs in various "byval" cases and emulated memcpys. - if (isa<IntegerType>(LI->getType()) && - TD->getTypeAllocSize(LI->getType()) == - TD->getTypeAllocSize(AI->getType()->getElementType())) { - Info.isMemCpySrc = true; - continue; +/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite +/// the instruction I, which references it, to use the separate elements. +/// Offset indicates the position within AI that is referenced by this +/// instruction. +void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, + SmallVector<AllocaInst*, 32> &NewElts) { + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + + if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { + RewriteBitCast(BC, AI, Offset, NewElts); + } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { + RewriteGEP(GEPI, AI, Offset, NewElts); + } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { + ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); + uint64_t MemSize = Length->getZExtValue(); + if (Offset == 0 && + MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) + RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); + // Otherwise the intrinsic can only touch a single element and the + // address operand will be updated, so nothing else needs to be done. + } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { + const Type *LIType = LI->getType(); + if (LIType == AI->getAllocatedType()) { + // Replace: + // %res = load { i32, i32 }* %alloc + // with: + // %load.0 = load i32* %alloc.0 + // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 + // %load.1 = load i32* %alloc.1 + // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 + // (Also works for arrays instead of structs) + Value *Insert = UndefValue::get(LIType); + for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { + Value *Load = new LoadInst(NewElts[i], "load", LI); + Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); + } + LI->replaceAllUsesWith(Insert); + DeadInsts.push_back(LI); + } else if (isa<IntegerType>(LIType) && + TD->getTypeAllocSize(LIType) == + TD->getTypeAllocSize(AI->getAllocatedType())) { + // If this is a load of the entire alloca to an integer, rewrite it. + RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); } - return MarkUnsafe(Info); - } else if (isa<DbgInfoIntrinsic>(UI)) { - // If one user is DbgInfoIntrinsic then check if all users are - // DbgInfoIntrinsics. - if (OnlyUsedByDbgInfoIntrinsics(BC)) { - Info.needsCleanup = true; - return; + } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { + Value *Val = SI->getOperand(0); + const Type *SIType = Val->getType(); + if (SIType == AI->getAllocatedType()) { + // Replace: + // store { i32, i32 } %val, { i32, i32 }* %alloc + // with: + // %val.0 = extractvalue { i32, i32 } %val, 0 + // store i32 %val.0, i32* %alloc.0 + // %val.1 = extractvalue { i32, i32 } %val, 1 + // store i32 %val.1, i32* %alloc.1 + // (Also works for arrays instead of structs) + for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { + Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); + new StoreInst(Extract, NewElts[i], SI); + } + DeadInsts.push_back(SI); + } else if (isa<IntegerType>(SIType) && + TD->getTypeAllocSize(SIType) == + TD->getTypeAllocSize(AI->getAllocatedType())) { + // If this is a store of the entire alloca from an integer, rewrite it. + RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); } - else - MarkUnsafe(Info); } - else { - return MarkUnsafe(Info); - } - if (Info.isUnsafe) return; } } -/// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes -/// to its first element. Transform users of the cast to use the new values -/// instead. -void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI, - SmallVector<AllocaInst*, 32> &NewElts) { - Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end(); - while (UI != UE) { - Instruction *User = cast<Instruction>(*UI++); - if (BitCastInst *BCU = dyn_cast<BitCastInst>(User)) { - RewriteBitCastUserOfAlloca(BCU, AI, NewElts); - if (BCU->use_empty()) BCU->eraseFromParent(); - continue; - } +/// RewriteBitCast - Update a bitcast reference to the alloca being replaced +/// and recursively continue updating all of its uses. +void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, + SmallVector<AllocaInst*, 32> &NewElts) { + RewriteForScalarRepl(BC, AI, Offset, NewElts); + if (BC->getOperand(0) != AI) + return; - if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { - // This must be memcpy/memmove/memset of the entire aggregate. - // Split into one per element. - RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts); - continue; - } - - if (StoreInst *SI = dyn_cast<StoreInst>(User)) { - // If this is a store of the entire alloca from an integer, rewrite it. - RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); - continue; - } + // The bitcast references the original alloca. Replace its uses with + // references to the first new element alloca. + Instruction *Val = NewElts[0]; + if (Val->getType() != BC->getDestTy()) { + Val = new BitCastInst(Val, BC->getDestTy(), "", BC); + Val->takeName(BC); + } + BC->replaceAllUsesWith(Val); + DeadInsts.push_back(BC); +} - if (LoadInst *LI = dyn_cast<LoadInst>(User)) { - // If this is a load of the entire alloca to an integer, rewrite it. - RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); - continue; - } - - // Otherwise it must be some other user of a gep of the first pointer. Just - // leave these alone. - continue; +/// FindElementAndOffset - Return the index of the element containing Offset +/// within the specified type, which must be either a struct or an array. +/// Sets T to the type of the element and Offset to the offset within that +/// element. IdxTy is set to the type of the index result to be used in a +/// GEP instruction. +uint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset, + const Type *&IdxTy) { + uint64_t Idx = 0; + if (const StructType *ST = dyn_cast<StructType>(T)) { + const StructLayout *Layout = TD->getStructLayout(ST); + Idx = Layout->getElementContainingOffset(Offset); + T = ST->getContainedType(Idx); + Offset -= Layout->getElementOffset(Idx); + IdxTy = Type::getInt32Ty(T->getContext()); + return Idx; } + const ArrayType *AT = cast<ArrayType>(T); + T = AT->getElementType(); + uint64_t EltSize = TD->getTypeAllocSize(T); + Idx = Offset / EltSize; + Offset -= Idx * EltSize; + IdxTy = Type::getInt64Ty(T->getContext()); + return Idx; +} + +/// RewriteGEP - Check if this GEP instruction moves the pointer across +/// elements of the alloca that are being split apart, and if so, rewrite +/// the GEP to be relative to the new element. +void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, + SmallVector<AllocaInst*, 32> &NewElts) { + uint64_t OldOffset = Offset; + SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); + Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), + &Indices[0], Indices.size()); + + RewriteForScalarRepl(GEPI, AI, Offset, NewElts); + + const Type *T = AI->getAllocatedType(); + const Type *IdxTy; + uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy); + if (GEPI->getOperand(0) == AI) + OldIdx = ~0ULL; // Force the GEP to be rewritten. + + T = AI->getAllocatedType(); + uint64_t EltOffset = Offset; + uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy); + + // If this GEP does not move the pointer across elements of the alloca + // being split, then it does not needs to be rewritten. + if (Idx == OldIdx) + return; + + const Type *i32Ty = Type::getInt32Ty(AI->getContext()); + SmallVector<Value*, 8> NewArgs; + NewArgs.push_back(Constant::getNullValue(i32Ty)); + while (EltOffset != 0) { + uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy); + NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx)); + } + Instruction *Val = NewElts[Idx]; + if (NewArgs.size() > 1) { + Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(), + NewArgs.end(), "", GEPI); + Val->takeName(GEPI); + } + if (Val->getType() != GEPI->getType()) + Val = new BitCastInst(Val, GEPI->getType(), Val->getNameStr(), GEPI); + GEPI->replaceAllUsesWith(Val); + DeadInsts.push_back(GEPI); } /// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. /// Rewrite it to copy or set the elements of the scalarized memory. -void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, +void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, AllocaInst *AI, SmallVector<AllocaInst*, 32> &NewElts) { - // If this is a memcpy/memmove, construct the other pointer as the // appropriate type. The "Other" pointer is the pointer that goes to memory // that doesn't have anything to do with the alloca that we are promoting. For @@ -761,28 +720,41 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, LLVMContext &Context = MI->getContext(); unsigned MemAlignment = MI->getAlignment(); if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy - if (BCInst == MTI->getRawDest()) + if (Inst == MTI->getRawDest()) OtherPtr = MTI->getRawSource(); else { - assert(BCInst == MTI->getRawSource()); + assert(Inst == MTI->getRawSource()); OtherPtr = MTI->getRawDest(); } } - // Keep track of the other intrinsic argument, so it can be removed if it - // is dead when the intrinsic is replaced. - Value *PossiblyDead = OtherPtr; - // If there is an other pointer, we want to convert it to the same pointer // type as AI has, so we can GEP through it safely. if (OtherPtr) { - // It is likely that OtherPtr is a bitcast, if so, remove it. - if (BitCastInst *BC = dyn_cast<BitCastInst>(OtherPtr)) - OtherPtr = BC->getOperand(0); - // All zero GEPs are effectively bitcasts. - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(OtherPtr)) - if (GEP->hasAllZeroIndices()) - OtherPtr = GEP->getOperand(0); + + // Remove bitcasts and all-zero GEPs from OtherPtr. This is an + // optimization, but it's also required to detect the corner case where + // both pointer operands are referencing the same memory, and where + // OtherPtr may be a bitcast or GEP that currently being rewritten. (This + // function is only called for mem intrinsics that access the whole + // aggregate, so non-zero GEPs are not an issue here.) + while (1) { + if (BitCastInst *BC = dyn_cast<BitCastInst>(OtherPtr)) { + OtherPtr = BC->getOperand(0); + continue; + } + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(OtherPtr)) { + // All zero GEPs are effectively bitcasts. + if (GEP->hasAllZeroIndices()) { + OtherPtr = GEP->getOperand(0); + continue; + } + } + break; + } + // If OtherPtr has already been rewritten, this intrinsic will be dead. + if (OtherPtr == NewElts[0]) + return; if (ConstantExpr *BCE = dyn_cast<ConstantExpr>(OtherPtr)) if (BCE->getOpcode() == Instruction::BitCast) @@ -798,7 +770,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, // Process each element of the aggregate. Value *TheFn = MI->getOperand(0); const Type *BytePtrTy = MI->getRawDest()->getType(); - bool SROADest = MI->getRawDest() == BCInst; + bool SROADest = MI->getRawDest() == Inst; Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); @@ -807,12 +779,15 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, Value *OtherElt = 0; unsigned OtherEltAlign = MemAlignment; - if (OtherPtr) { + if (OtherPtr == AI) { + OtherElt = NewElts[i]; + OtherEltAlign = 0; + } else if (OtherPtr) { Value *Idx[2] = { Zero, ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; - OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2, + OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2, OtherPtr->getNameStr()+"."+Twine(i), - MI); + MI); uint64_t EltOffset; const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType()); if (const StructType *ST = @@ -924,9 +899,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, CallInst::Create(TheFn, Ops, Ops + 4, "", MI); } } - MI->eraseFromParent(); - if (PossiblyDead) - RecursivelyDeleteTriviallyDeadInstructions(PossiblyDead); + DeadInsts.push_back(MI); } /// RewriteStoreUserOfWholeAlloca - We found a store of an integer that @@ -937,15 +910,9 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, // Extract each element out of the integer according to its structure offset // and store the element value to the individual alloca. Value *SrcVal = SI->getOperand(0); - const Type *AllocaEltTy = AI->getType()->getElementType(); + const Type *AllocaEltTy = AI->getAllocatedType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); - // If this isn't a store of an integer to the whole alloca, it may be a store - // to the first element. Just ignore the store in this case and normal SROA - // will handle it. - if (!isa<IntegerType>(SrcVal->getType()) || - TD->getTypeAllocSizeInBits(SrcVal->getType()) != AllocaSizeBits) - return; // Handle tail padding by extending the operand if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) SrcVal = new ZExtInst(SrcVal, @@ -1050,7 +1017,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, } } - SI->eraseFromParent(); + DeadInsts.push_back(SI); } /// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to @@ -1059,16 +1026,9 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SmallVector<AllocaInst*, 32> &NewElts) { // Extract each element out of the NewElts according to its structure offset // and form the result value. - const Type *AllocaEltTy = AI->getType()->getElementType(); + const Type *AllocaEltTy = AI->getAllocatedType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); - // If this isn't a load of the whole alloca to an integer, it may be a load - // of the first element. Just ignore the load in this case and normal SROA - // will handle it. - if (!isa<IntegerType>(LI->getType()) || - TD->getTypeAllocSizeInBits(LI->getType()) != AllocaSizeBits) - return; - DEBUG(errs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI << '\n'); @@ -1139,10 +1099,9 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI); LI->replaceAllUsesWith(ResultVal); - LI->eraseFromParent(); + DeadInsts.push_back(LI); } - /// HasPadding - Return true if the specified type has any structure or /// alignment padding, false otherwise. static bool HasPadding(const Type *Ty, const TargetData &TD) { @@ -1192,14 +1151,10 @@ int SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { // the users are safe to transform. AllocaInfo Info; - for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); - I != E; ++I) { - isSafeUseOfAllocation(cast<Instruction>(*I), AI, Info); - if (Info.isUnsafe) { - DEBUG(errs() << "Cannot transform: " << *AI << "\n due to user: " - << **I << '\n'); - return 0; - } + isSafeForScalarRepl(AI, AI, 0, Info); + if (Info.isUnsafe) { + DEBUG(errs() << "Cannot transform: " << *AI << '\n'); + return 0; } // Okay, we know all the users are promotable. If the aggregate is a memcpy @@ -1208,88 +1163,28 @@ int SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { // types, but may actually be used. In these cases, we refuse to promote the // struct. if (Info.isMemCpySrc && Info.isMemCpyDst && - HasPadding(AI->getType()->getElementType(), *TD)) + HasPadding(AI->getAllocatedType(), *TD)) return 0; // If we require cleanup, return 1, otherwise return 3. return Info.needsCleanup ? 1 : 3; } -/// CleanupGEP - GEP is used by an Alloca, which can be promoted after the GEP -/// is canonicalized here. -void SROA::CleanupGEP(GetElementPtrInst *GEPI) { - gep_type_iterator I = gep_type_begin(GEPI); - ++I; - - const ArrayType *AT = dyn_cast<ArrayType>(*I); - if (!AT) - return; - - uint64_t NumElements = AT->getNumElements(); - - if (isa<ConstantInt>(I.getOperand())) - return; - - if (NumElements == 1) { - GEPI->setOperand(2, - Constant::getNullValue(Type::getInt32Ty(GEPI->getContext()))); - return; - } - - assert(NumElements == 2 && "Unhandled case!"); - // All users of the GEP must be loads. At each use of the GEP, insert - // two loads of the appropriate indexed GEP and select between them. - Value *IsOne = new ICmpInst(GEPI, ICmpInst::ICMP_NE, I.getOperand(), - Constant::getNullValue(I.getOperand()->getType()), - "isone"); - // Insert the new GEP instructions, which are properly indexed. - SmallVector<Value*, 8> Indices(GEPI->op_begin()+1, GEPI->op_end()); - Indices[1] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); - Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0), - Indices.begin(), - Indices.end(), - GEPI->getName()+".0", GEPI); - Indices[1] = ConstantInt::get(Type::getInt32Ty(GEPI->getContext()), 1); - Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0), - Indices.begin(), - Indices.end(), - GEPI->getName()+".1", GEPI); - // Replace all loads of the variable index GEP with loads from both - // indexes and a select. - while (!GEPI->use_empty()) { - LoadInst *LI = cast<LoadInst>(GEPI->use_back()); - Value *Zero = new LoadInst(ZeroIdx, LI->getName()+".0", LI); - Value *One = new LoadInst(OneIdx , LI->getName()+".1", LI); - Value *R = SelectInst::Create(IsOne, One, Zero, LI->getName(), LI); - LI->replaceAllUsesWith(R); - LI->eraseFromParent(); - } - GEPI->eraseFromParent(); -} - - /// CleanupAllocaUsers - If SROA reported that it can promote the specified /// allocation, but only if cleaned up, perform the cleanups required. -void SROA::CleanupAllocaUsers(AllocaInst *AI) { - // At this point, we know that the end result will be SROA'd and promoted, so - // we can insert ugly code if required so long as sroa+mem2reg will clean it - // up. - for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); +void SROA::CleanupAllocaUsers(Value *V) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) { User *U = *UI++; - if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) - CleanupGEP(GEPI); - else { - Instruction *I = cast<Instruction>(U); - SmallVector<DbgInfoIntrinsic *, 2> DbgInUses; - if (!isa<StoreInst>(I) && OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) { - // Safe to remove debug info uses. - while (!DbgInUses.empty()) { - DbgInfoIntrinsic *DI = DbgInUses.back(); DbgInUses.pop_back(); - DI->eraseFromParent(); - } - I->eraseFromParent(); + Instruction *I = cast<Instruction>(U); + SmallVector<DbgInfoIntrinsic *, 2> DbgInUses; + if (!isa<StoreInst>(I) && OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) { + // Safe to remove debug info uses. + while (!DbgInUses.empty()) { + DbgInfoIntrinsic *DI = DbgInUses.back(); DbgInUses.pop_back(); + DI->eraseFromParent(); } + I->eraseFromParent(); } } } @@ -1395,7 +1290,7 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, // Compute the offset that this GEP adds to the pointer. SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); - uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), + uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), &Indices[0], Indices.size()); // See if all uses can be converted. if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset, @@ -1457,7 +1352,7 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { // Compute the offset that this GEP adds to the pointer. SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); - uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), + uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), &Indices[0], Indices.size()); ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); GEP->eraseFromParent(); @@ -1478,13 +1373,16 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { if (StoreInst *SI = dyn_cast<StoreInst>(User)) { assert(SI->getOperand(0) != Ptr && "Consistency error!"); - // FIXME: Remove once builder has Twine API. - Value *Old = Builder.CreateLoad(NewAI, - (NewAI->getName()+".in").str().c_str()); + Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset, Builder); Builder.CreateStore(New, NewAI); SI->eraseFromParent(); + + // If the load we just inserted is now dead, then the inserted store + // overwrote the entire thing. + if (Old->use_empty()) + Old->eraseFromParent(); continue; } @@ -1504,13 +1402,16 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { for (unsigned i = 1; i != NumBytes; ++i) APVal |= APVal << 8; - // FIXME: Remove once builder has Twine API. - Value *Old = Builder.CreateLoad(NewAI, - (NewAI->getName()+".in").str().c_str()); + Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); Value *New = ConvertScalar_InsertValue( ConstantInt::get(User->getContext(), APVal), Old, Offset, Builder); Builder.CreateStore(New, NewAI); + + // If the load we just inserted is now dead, then the memset overwrote + // the entire thing. + if (Old->use_empty()) + Old->eraseFromParent(); } MSI->eraseFromParent(); continue; diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index e905952..a36da78 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -189,6 +189,77 @@ static bool RemoveUnreachableBlocksFromFn(Function &F) { return true; } +/// MergeEmptyReturnBlocks - If we have more than one empty (other than phi +/// node) return blocks, merge them together to promote recursive block merging. +static bool MergeEmptyReturnBlocks(Function &F) { + bool Changed = false; + + BasicBlock *RetBlock = 0; + + // Scan all the blocks in the function, looking for empty return blocks. + for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) { + BasicBlock &BB = *BBI++; + + // Only look at return blocks. + ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator()); + if (Ret == 0) continue; + + // Only look at the block if it is empty or the only other thing in it is a + // single PHI node that is the operand to the return. + if (Ret != &BB.front()) { + // Check for something else in the block. + BasicBlock::iterator I = Ret; + --I; + if (!isa<PHINode>(I) || I != BB.begin() || + Ret->getNumOperands() == 0 || + Ret->getOperand(0) != I) + continue; + } + + // If this is the first returning block, remember it and keep going. + if (RetBlock == 0) { + RetBlock = &BB; + continue; + } + + // Otherwise, we found a duplicate return block. Merge the two. + Changed = true; + + // Case when there is no input to the return or when the returned values + // agree is trivial. Note that they can't agree if there are phis in the + // blocks. + if (Ret->getNumOperands() == 0 || + Ret->getOperand(0) == + cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0)) { + BB.replaceAllUsesWith(RetBlock); + BB.eraseFromParent(); + continue; + } + + // If the canonical return block has no PHI node, create one now. + PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin()); + if (RetBlockPHI == 0) { + Value *InVal = cast<ReturnInst>(RetBlock->begin())->getOperand(0); + RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), "merge", + &RetBlock->front()); + + for (pred_iterator PI = pred_begin(RetBlock), E = pred_end(RetBlock); + PI != E; ++PI) + RetBlockPHI->addIncoming(InVal, *PI); + RetBlock->getTerminator()->setOperand(0, RetBlockPHI); + } + + // Turn BB into a block that just unconditionally branches to the return + // block. This handles the case when the two return blocks have a common + // predecessor but that return different things. + RetBlockPHI->addIncoming(Ret->getOperand(0), &BB); + BB.getTerminator()->eraseFromParent(); + BranchInst::Create(RetBlock, &BB); + } + + return Changed; +} + /// IterativeSimplifyCFG - Call SimplifyCFG on all the blocks in the function, /// iterating until no more changes are made. static bool IterativeSimplifyCFG(Function &F) { @@ -216,6 +287,7 @@ static bool IterativeSimplifyCFG(Function &F) { // bool CFGSimplifyPass::runOnFunction(Function &F) { bool EverChanged = RemoveUnreachableBlocksFromFn(F); + EverChanged |= MergeEmptyReturnBlocks(F); EverChanged |= IterativeSimplifyCFG(F); // If neither pass changed anything, we're done. diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 6fd884b..3c28ad2 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -76,6 +76,11 @@ public: /// return value has 'intptr_t' type. Value *EmitStrLen(Value *Ptr, IRBuilder<> &B); + /// EmitStrChr - Emit a call to the strchr function to the builder, for the + /// specified pointer and character. Ptr is required to be some pointer type, + /// and the return value has 'i8*' type. + Value *EmitStrChr(Value *Ptr, char C, IRBuilder<> &B); + /// EmitMemCpy - Emit a call to the memcpy function to the builder. This /// always expects that the size has type 'intptr_t' and Dst/Src are pointers. Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len, @@ -151,6 +156,26 @@ Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) { return CI; } +/// EmitStrChr - Emit a call to the strchr function to the builder, for the +/// specified pointer and character. Ptr is required to be some pointer type, +/// and the return value has 'i8*' type. +Value *LibCallOptimization::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B) { + Module *M = Caller->getParent(); + AttributeWithIndex AWI = + AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); + + const Type *I8Ptr = Type::getInt8PtrTy(*Context); + const Type *I32Ty = Type::getInt32Ty(*Context); + Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1), + I8Ptr, I8Ptr, I32Ty, NULL); + CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B), + ConstantInt::get(I32Ty, C), "strchr"); + if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + return CI; +} + + /// EmitMemCpy - Emit a call to the memcpy function to the builder. This always /// expects that the size has type 'intptr_t' and Dst/Src are pointers. Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len, @@ -880,17 +905,16 @@ struct StrLenOpt : public LibCallOptimization { if (uint64_t Len = GetStringLength(Src)) return ConstantInt::get(CI->getType(), Len-1); - // Handle strlen(p) != 0. - if (!IsOnlyUsedInZeroEqualityComparison(CI)) return 0; - // strlen(x) != 0 --> *x != 0 // strlen(x) == 0 --> *x == 0 - return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType()); + if (IsOnlyUsedInZeroEqualityComparison(CI)) + return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType()); + return 0; } }; //===---------------------------------------===// -// 'strto*' Optimizations +// 'strto*' Optimizations. This handles strtol, strtod, strtof, strtoul, etc. struct StrToOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { @@ -910,6 +934,52 @@ struct StrToOpt : public LibCallOptimization { } }; +//===---------------------------------------===// +// 'strstr' Optimizations + +struct StrStrOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + !isa<PointerType>(FT->getParamType(0)) || + !isa<PointerType>(FT->getParamType(1)) || + !isa<PointerType>(FT->getReturnType())) + return 0; + + // fold strstr(x, x) -> x. + if (CI->getOperand(1) == CI->getOperand(2)) + return B.CreateBitCast(CI->getOperand(1), CI->getType()); + + // See if either input string is a constant string. + std::string SearchStr, ToFindStr; + bool HasStr1 = GetConstantStringInfo(CI->getOperand(1), SearchStr); + bool HasStr2 = GetConstantStringInfo(CI->getOperand(2), ToFindStr); + + // fold strstr(x, "") -> x. + if (HasStr2 && ToFindStr.empty()) + return B.CreateBitCast(CI->getOperand(1), CI->getType()); + + // If both strings are known, constant fold it. + if (HasStr1 && HasStr2) { + std::string::size_type Offset = SearchStr.find(ToFindStr); + + if (Offset == std::string::npos) // strstr("foo", "bar") -> null + return Constant::getNullValue(CI->getType()); + + // strstr("abcd", "bc") -> gep((char*)"abcd", 1) + Value *Result = CastToCStr(CI->getOperand(1), B); + Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr"); + return B.CreateBitCast(Result, CI->getType()); + } + + // fold strstr(x, "y") -> strchr(x, 'y'). + if (HasStr2 && ToFindStr.size() == 1) + return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B), + CI->getType()); + return 0; + } +}; + //===---------------------------------------===// // 'memcmp' Optimizations @@ -941,19 +1011,6 @@ struct MemCmpOpt : public LibCallOptimization { return B.CreateSExt(B.CreateSub(LHSV, RHSV, "chardiff"), CI->getType()); } - // memcmp(S1,S2,2) != 0 -> (*(short*)LHS ^ *(short*)RHS) != 0 - // memcmp(S1,S2,4) != 0 -> (*(int*)LHS ^ *(int*)RHS) != 0 - if ((Len == 2 || Len == 4) && IsOnlyUsedInZeroEqualityComparison(CI)) { - const Type *PTy = PointerType::getUnqual(Len == 2 ? - Type::getInt16Ty(*Context) : Type::getInt32Ty(*Context)); - LHS = B.CreateBitCast(LHS, PTy, "tmp"); - RHS = B.CreateBitCast(RHS, PTy, "tmp"); - LoadInst *LHSV = B.CreateLoad(LHS, "lhsv"); - LoadInst *RHSV = B.CreateLoad(RHS, "rhsv"); - LHSV->setAlignment(1); RHSV->setAlignment(1); // Unaligned loads. - return B.CreateZExt(B.CreateXor(LHSV, RHSV, "shortdiff"), CI->getType()); - } - // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant) std::string LHSStr, RHSStr; if (GetConstantStringInfo(LHS, LHSStr) && @@ -1051,7 +1108,7 @@ struct SizeOpt : public LibCallOptimization { const Type *Ty = Callee->getFunctionType()->getReturnType(); - if (Const->getZExtValue() < 2) + if (Const->getZExtValue() == 0) return Constant::getAllOnesValue(Ty); else return ConstantInt::get(Ty, 0); @@ -1071,8 +1128,8 @@ struct MemCpyChkOpt : public LibCallOptimization { if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || !isa<PointerType>(FT->getParamType(0)) || !isa<PointerType>(FT->getParamType(1)) || - !isa<IntegerType>(FT->getParamType(3)) || - FT->getParamType(2) != TD->getIntPtrType(*Context)) + !isa<IntegerType>(FT->getParamType(3)) || + FT->getParamType(2) != TD->getIntPtrType(*Context)) return 0; ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4)); @@ -1099,7 +1156,7 @@ struct MemSetChkOpt : public LibCallOptimization { if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || !isa<PointerType>(FT->getParamType(0)) || !isa<IntegerType>(FT->getParamType(1)) || - !isa<IntegerType>(FT->getParamType(3)) || + !isa<IntegerType>(FT->getParamType(3)) || FT->getParamType(2) != TD->getIntPtrType(*Context)) return 0; @@ -1129,7 +1186,7 @@ struct MemMoveChkOpt : public LibCallOptimization { if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || !isa<PointerType>(FT->getParamType(0)) || !isa<PointerType>(FT->getParamType(1)) || - !isa<IntegerType>(FT->getParamType(3)) || + !isa<IntegerType>(FT->getParamType(3)) || FT->getParamType(2) != TD->getIntPtrType(*Context)) return 0; @@ -1675,8 +1732,8 @@ namespace { // String and Memory LibCall Optimizations StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrCmpOpt StrCmp; StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrNCpyOpt StrNCpy; StrLenOpt StrLen; - StrToOpt StrTo; MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; - MemSetOpt MemSet; + StrToOpt StrTo; StrStrOpt StrStr; + MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet; // Math Library Optimizations PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP; // Integer Optimizations @@ -1738,6 +1795,7 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["strtoll"] = &StrTo; Optimizations["strtold"] = &StrTo; Optimizations["strtoull"] = &StrTo; + Optimizations["strstr"] = &StrStr; Optimizations["memcmp"] = &MemCmp; Optimizations["memcpy"] = &MemCpy; Optimizations["memmove"] = &MemMove; @@ -2644,12 +2702,6 @@ bool SimplifyLibCalls::doInitialization(Module &M) { // * strcspn("",a) -> 0 // * strcspn(s,"") -> strlen(a) // -// strstr: (PR5783) -// * strstr(x,x) -> x -// * strstr(x, "") -> x -// * strstr(x, "a") -> strchr(x, 'a') -// * strstr(s1,s2) -> result (if s1 and s2 are constant strings) -// // tan, tanf, tanl: // * tan(atan(x)) -> x // diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index ccd97c8..19c7206 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -309,10 +309,10 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, if (TIL == DestLoop) { // Both in the same loop, the NewBB joins loop. DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); - } else if (TIL->contains(DestLoop->getHeader())) { + } else if (TIL->contains(DestLoop)) { // Edge from an outer loop to an inner loop. Add to the outer loop. TIL->addBasicBlockToLoop(NewBB, LI->getBase()); - } else if (DestLoop->contains(TIL->getHeader())) { + } else if (DestLoop->contains(TIL)) { // Edge from an inner loop to an outer loop. Add to the outer loop. DestLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else { diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 162d7b3..c287747 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -21,6 +21,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/Function.h" #include "llvm/LLVMContext.h" +#include "llvm/Metadata.h" #include "llvm/Support/CFG.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include "llvm/Analysis/ConstantFolding.h" @@ -347,8 +348,7 @@ ConstantFoldMappedInstruction(const Instruction *I) { Ops.size(), TD); } -static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD, - LLVMContext &Context) { +static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD) { DILocation ILoc(InsnMD); if (ILoc.isNull()) return InsnMD; @@ -358,14 +358,15 @@ static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD, DILocation OrigLocation = ILoc.getOrigLocation(); MDNode *NewLoc = TheCallMD; if (!OrigLocation.isNull()) - NewLoc = UpdateInlinedAtInfo(OrigLocation.getNode(), TheCallMD, Context); - - SmallVector<Value *, 4> MDVs; - MDVs.push_back(InsnMD->getElement(0)); // Line - MDVs.push_back(InsnMD->getElement(1)); // Col - MDVs.push_back(InsnMD->getElement(2)); // Scope - MDVs.push_back(NewLoc); - return MDNode::get(Context, MDVs.data(), MDVs.size()); + NewLoc = UpdateInlinedAtInfo(OrigLocation.getNode(), TheCallMD); + + Value *MDVs[] = { + InsnMD->getOperand(0), // Line + InsnMD->getOperand(1), // Col + InsnMD->getOperand(2), // Scope + NewLoc + }; + return MDNode::get(InsnMD->getContext(), MDVs, 4); } /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, @@ -421,12 +422,10 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // BasicBlock::iterator I = NewBB->begin(); - LLVMContext &Context = OldFunc->getContext(); - unsigned DbgKind = Context.getMetadata().getMDKind("dbg"); + unsigned DbgKind = OldFunc->getContext().getMDKindID("dbg"); MDNode *TheCallMD = NULL; - SmallVector<Value *, 4> MDVs; if (TheCall && TheCall->hasMetadata()) - TheCallMD = Context.getMetadata().getMD(DbgKind, TheCall); + TheCallMD = TheCall->getMetadata(DbgKind); // Handle PHI nodes specially, as we have to remove references to dead // blocks. @@ -436,32 +435,38 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) { if (I->hasMetadata()) { if (TheCallMD) { - if (MDNode *IMD = Context.getMetadata().getMD(DbgKind, I)) { - MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD, Context); - Context.getMetadata().addMD(DbgKind, NewMD, I); + if (MDNode *IMD = I->getMetadata(DbgKind)) { + MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD); + I->setMetadata(DbgKind, NewMD); } } else { // The cloned instruction has dbg info but the call instruction // does not have dbg info. Remove dbg info from cloned instruction. - Context.getMetadata().removeMD(DbgKind, I); + I->setMetadata(DbgKind, 0); } } PHIToResolve.push_back(cast<PHINode>(OldI)); } } + // FIXME: + // FIXME: + // FIXME: Unclone all this metadata stuff. + // FIXME: + // FIXME: + // Otherwise, remap the rest of the instructions normally. for (; I != NewBB->end(); ++I) { if (I->hasMetadata()) { if (TheCallMD) { - if (MDNode *IMD = Context.getMetadata().getMD(DbgKind, I)) { - MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD, Context); - Context.getMetadata().addMD(DbgKind, NewMD, I); + if (MDNode *IMD = I->getMetadata(DbgKind)) { + MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD); + I->setMetadata(DbgKind, NewMD); } } else { // The cloned instruction has dbg info but the call instruction // does not have dbg info. Remove dbg info from cloned instruction. - Context.getMetadata().removeMD(DbgKind, I); + I->setMetadata(DbgKind, 0); } } RemapInstruction(I, ValueMap); diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 7a37aa3..2426e3e 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -20,10 +20,9 @@ #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Target/TargetData.h" @@ -31,6 +30,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 690972d..7fcc5f7 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -109,7 +109,7 @@ X("loopsimplify", "Canonicalize natural loops", true); const PassInfo *const llvm::LoopSimplifyID = &X; Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } -/// runOnFunction - Run down all loops in the CFG (recursively, but we could do +/// runOnLoop - Run down all loops in the CFG (recursively, but we could do /// it in any convenient order) inserting preheaders... /// bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) { @@ -305,12 +305,6 @@ ReprocessLoop: } } - // If there are duplicate phi nodes (for example, from loop rotation), - // get rid of them. - for (Loop::block_iterator BB = L->block_begin(), E = L->block_end(); - BB != E; ++BB) - EliminateDuplicatePHINodes(*BB); - return Changed; } diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 6232f32..6b2c591 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -194,7 +194,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) OrigPHINode.push_back(PN); if (Instruction *I = dyn_cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock))) - if (L->contains(I->getParent())) + if (L->contains(I)) LastValueMap[I] = I; } @@ -222,7 +222,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) PHINode *NewPHI = cast<PHINode>(ValueMap[OrigPHINode[i]]); Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); if (Instruction *InValI = dyn_cast<Instruction>(InVal)) - if (It > 1 && L->contains(InValI->getParent())) + if (It > 1 && L->contains(InValI)) InVal = LastValueMap[InValI]; ValueMap[OrigPHINode[i]] = InVal; New->getInstList().erase(NewPHI); @@ -244,7 +244,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) UI != UE;) { Instruction *UseInst = cast<Instruction>(*UI); ++UI; - if (isa<PHINode>(UseInst) && !L->contains(UseInst->getParent())) { + if (isa<PHINode>(UseInst) && !L->contains(UseInst)) { PHINode *phi = cast<PHINode>(UseInst); Value *Incoming = phi->getIncomingValueForBlock(*BB); phi->addIncoming(Incoming, New); @@ -295,7 +295,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) // If this value was defined in the loop, take the value defined by the // last iteration of the loop. if (Instruction *InValI = dyn_cast<Instruction>(InVal)) { - if (L->contains(InValI->getParent())) + if (L->contains(InValI)) InVal = LastValueMap[InVal]; } PN->addIncoming(InVal, LastIterationBB); diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index ba41bf9..9881b3c 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -149,7 +149,29 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { if (SingularValue != 0) return SingularValue; - // Otherwise, we do need a PHI: insert one now. + // Otherwise, we do need a PHI: check to see if we already have one available + // in this block that produces the right value. + if (isa<PHINode>(BB->begin())) { + DenseMap<BasicBlock*, Value*> ValueMapping(PredValues.begin(), + PredValues.end()); + PHINode *SomePHI; + for (BasicBlock::iterator It = BB->begin(); + (SomePHI = dyn_cast<PHINode>(It)); ++It) { + // Scan this phi to see if it is what we need. + bool Equal = true; + for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) + if (ValueMapping[SomePHI->getIncomingBlock(i)] != + SomePHI->getIncomingValue(i)) { + Equal = false; + break; + } + + if (Equal) + return SomePHI; + } + } + + // Ok, we have no way out, insert a new one now. PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(), &BB->front()); @@ -198,7 +220,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { // Query AvailableVals by doing an insertion of null. std::pair<AvailableValsTy::iterator, bool> InsertRes = - AvailableVals.insert(std::make_pair(BB, WeakVH())); + AvailableVals.insert(std::make_pair(BB, TrackingVH<Value>())); // Handle the case when the insertion fails because we have already seen BB. if (!InsertRes.second) { @@ -214,8 +236,8 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { // it. When we get back to the first instance of the recursion we will fill // in the PHI node. return InsertRes.first->second = - PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(), - &BB->front()); + PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(), + &BB->front()); } // Okay, the value isn't in the map and we just inserted a null in the entry |