diff options
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r-- | lib/Transforms/Scalar/CodeGenPrepare.cpp | 26 | ||||
-rw-r--r-- | lib/Transforms/Scalar/GVN.cpp | 15 | ||||
-rw-r--r-- | lib/Transforms/Scalar/IndVarSimplify.cpp | 761 | ||||
-rw-r--r-- | lib/Transforms/Scalar/JumpThreading.cpp | 9 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LICM.cpp | 29 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 115 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopStrengthReduce.cpp | 80 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopUnswitch.cpp | 48 | ||||
-rw-r--r-- | lib/Transforms/Scalar/MemCpyOptimizer.cpp | 45 | ||||
-rw-r--r-- | lib/Transforms/Scalar/SCCP.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/Scalar/ScalarReplAggregates.cpp | 54 | ||||
-rw-r--r-- | lib/Transforms/Scalar/SimplifyCFGPass.cpp | 3 | ||||
-rw-r--r-- | lib/Transforms/Scalar/TailRecursionElimination.cpp | 9 |
13 files changed, 941 insertions, 255 deletions
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 0184390..0af14ed 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -147,7 +147,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { if (!DisableBranchOpts) { MadeChange = false; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - MadeChange |= ConstantFoldTerminator(BB); + MadeChange |= ConstantFoldTerminator(BB, true); if (MadeChange) ModifiedDT = true; @@ -371,9 +371,11 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ // If these values will be promoted, find out what they will be promoted // to. This helps us consider truncates on PPC as noop copies when they // are. - if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote) + if (TLI.getTypeAction(CI->getContext(), SrcVT) == + TargetLowering::TypePromoteInteger) SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); - if (TLI.getTypeAction(DstVT) == TargetLowering::Promote) + if (TLI.getTypeAction(CI->getContext(), DstVT) == + TargetLowering::TypePromoteInteger) DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); // If, after promotion, these are the same types, this is a noop copy. @@ -548,7 +550,23 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { // From here on out we're working with named functions. if (CI->getCalledFunction() == 0) return false; - + + // llvm.dbg.value is far away from the value then iSel may not be able + // handle it properly. iSel will drop llvm.dbg.value if it can not + // find a node corresponding to the value. + if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(CI)) + if (Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue())) + if (!VI->isTerminator() && + (DVI->getParent() != VI->getParent() || DT->dominates(DVI, VI))) { + DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI); + DVI->removeFromParent(); + if (isa<PHINode>(VI)) + DVI->insertBefore(VI->getParent()->getFirstNonPHI()); + else + DVI->insertAfter(VI); + return true; + } + // We'll need TargetData from here on out. const TargetData *TD = TLI ? TLI->getTargetData() : 0; if (!TD) return false; diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index efecb97..2515fd1 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -952,12 +952,12 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, IntegerType::get(LoadTy->getContext(), NewLoadSize*8); DestPTy = PointerType::get(DestPTy, cast<PointerType>(PtrVal->getType())->getAddressSpace()); - + Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc()); PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); LoadInst *NewLoad = Builder.CreateLoad(PtrVal); NewLoad->takeName(SrcVal); NewLoad->setAlignment(SrcVal->getAlignment()); - + DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); @@ -1576,6 +1576,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) NewLoad->setMetadata(LLVMContext::MD_tbaa, Tag); + // Transfer DebugLoc. + NewLoad->setDebugLoc(LI->getDebugLoc()); + // Add the newly created load. ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred, NewLoad)); @@ -1604,6 +1607,11 @@ bool GVN::processLoad(LoadInst *L) { if (L->isVolatile()) return false; + if (L->use_empty()) { + markInstructionForDeletion(L); + return true; + } + // ... to a pointer that has been loaded from before... MemDepResult Dep = MD->getDependency(L); @@ -2099,6 +2107,7 @@ bool GVN::performPRE(Function &F) { PREInstr->insertBefore(PREPred->getTerminator()); PREInstr->setName(CurInst->getName() + ".pre"); + PREInstr->setDebugLoc(CurInst->getDebugLoc()); predMap[PREPred] = PREInstr; VN.add(PREInstr, ValNo); ++NumGVNPRE; @@ -2118,7 +2127,7 @@ bool GVN::performPRE(Function &F) { VN.add(Phi, ValNo); addToLeaderTable(ValNo, Phi, CurrentBlock); - + Phi->setDebugLoc(CurInst->getDebugLoc()); CurInst->replaceAllUsesWith(Phi); if (Phi->getType()->isPointerTy()) { // Because we have added a PHI-use of the pointer value, it has now diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 09d569a..04ee7c8 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -52,20 +52,30 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Support/CFG.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Target/TargetData.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" using namespace llvm; STATISTIC(NumRemoved , "Number of aux indvars removed"); +STATISTIC(NumWidened , "Number of indvars widened"); STATISTIC(NumInserted, "Number of canonical indvars added"); STATISTIC(NumReplaced, "Number of exit values replaced"); STATISTIC(NumLFTR , "Number of loop exit tests replaced"); +STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); +STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); +STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); + +// DisableIVRewrite mode currently affects IVUsers, so is defined in libAnalysis +// and referenced here. +namespace llvm { + extern bool DisableIVRewrite; +} namespace { class IndVarSimplify : public LoopPass { @@ -73,12 +83,13 @@ namespace { LoopInfo *LI; ScalarEvolution *SE; DominatorTree *DT; + TargetData *TD; SmallVector<WeakVH, 16> DeadInsts; bool Changed; public: static char ID; // Pass identification, replacement for typeid - IndVarSimplify() : LoopPass(ID) { + IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0) { initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry()); } @@ -101,15 +112,18 @@ namespace { private: bool isValidRewrite(Value *FromVal, Value *ToVal); - void EliminateIVComparisons(); - void EliminateIVRemainders(); + void SimplifyIVUsers(SCEVExpander &Rewriter); + void EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); + void EliminateIVRemainder(BinaryOperator *Rem, + Value *IVOperand, + bool IsSigned, + PHINode *IVPhi); void RewriteNonIntegerIVs(Loop *L); ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, - PHINode *IndVar, - BasicBlock *ExitingBlock, - BranchInst *BI, - SCEVExpander &Rewriter); + PHINode *IndVar, + SCEVExpander &Rewriter); + void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter); @@ -122,7 +136,7 @@ namespace { char IndVarSimplify::ID = 0; INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars", - "Canonicalize Induction Variables", false, false) + "Induction Variable Simplification", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) @@ -130,7 +144,7 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_DEPENDENCY(IVUsers) INITIALIZE_PASS_END(IndVarSimplify, "indvars", - "Canonicalize Induction Variables", false, false) + "Induction Variable Simplification", false, false) Pass *llvm::createIndVarSimplifyPass() { return new IndVarSimplify(); @@ -183,17 +197,23 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { return true; } -/// LinearFunctionTestReplace - This method rewrites the exit condition of the -/// loop to be a canonical != comparison against the incremented loop induction -/// variable. This pass is able to rewrite the exit tests of any loop where the -/// SCEV analysis can determine a loop-invariant trip count of the loop, which -/// is actually a much broader range than just linear tests. -ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, - const SCEV *BackedgeTakenCount, - PHINode *IndVar, - BasicBlock *ExitingBlock, - BranchInst *BI, - SCEVExpander &Rewriter) { +/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken +/// count expression can be safely and cheaply expanded into an instruction +/// sequence that can be used by LinearFunctionTestReplace. +static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { + const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); + if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) || + BackedgeTakenCount->isZero()) + return false; + + if (!L->getExitingBlock()) + return false; + + // Can't rewrite non-branch yet. + BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); + if (!BI) + return false; + // Special case: If the backedge-taken count is a UDiv, it's very likely a // UDiv that ScalarEvolution produced in order to compute a precise // expression, rather than a UDiv from the user's code. If we can't find a @@ -201,23 +221,68 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, // rewriting the loop. if (isa<SCEVUDivExpr>(BackedgeTakenCount)) { ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition()); - if (!OrigCond) return 0; + if (!OrigCond) return false; const SCEV *R = SE->getSCEV(OrigCond->getOperand(1)); R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1)); if (R != BackedgeTakenCount) { const SCEV *L = SE->getSCEV(OrigCond->getOperand(0)); L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1)); if (L != BackedgeTakenCount) - return 0; + return false; } } + return true; +} + +/// getBackedgeIVType - Get the widest type used by the loop test after peeking +/// through Truncs. +/// +/// TODO: Unnecessary once LinearFunctionTestReplace is removed. +static const Type *getBackedgeIVType(Loop *L) { + if (!L->getExitingBlock()) + return 0; + + // Can't rewrite non-branch yet. + BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); + if (!BI) + return 0; + + ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); + if (!Cond) + return 0; + + const Type *Ty = 0; + for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end(); + OI != OE; ++OI) { + assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types"); + TruncInst *Trunc = dyn_cast<TruncInst>(*OI); + if (!Trunc) + continue; + + return Trunc->getSrcTy(); + } + return Ty; +} + +/// LinearFunctionTestReplace - This method rewrites the exit condition of the +/// loop to be a canonical != comparison against the incremented loop induction +/// variable. This pass is able to rewrite the exit tests of any loop where the +/// SCEV analysis can determine a loop-invariant trip count of the loop, which +/// is actually a much broader range than just linear tests. +ICmpInst *IndVarSimplify:: +LinearFunctionTestReplace(Loop *L, + const SCEV *BackedgeTakenCount, + PHINode *IndVar, + SCEVExpander &Rewriter) { + assert(canExpandBackedgeTakenCount(L, SE) && "precondition"); + BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator()); // If the exiting block is not the same as the backedge block, we must compare // against the preincremented value, otherwise we prefer to compare against // the post-incremented value. Value *CmpIndVar; const SCEV *RHS = BackedgeTakenCount; - if (ExitingBlock == L->getLoopLatch()) { + if (L->getExitingBlock() == L->getLoopLatch()) { // Add one to the "backedge-taken" count to get the trip count. // If this addition may overflow, we have to be more pessimistic and // cast the induction variable before doing the add. @@ -240,7 +305,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, // The BackedgeTaken expression contains the number of times that the // backedge branches to the loop header. This is one less than the // number of times the loop executes, so use the incremented indvar. - CmpIndVar = IndVar->getIncomingValueForBlock(ExitingBlock); + CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock()); } else { // We have to use the preincremented value... RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, @@ -418,96 +483,519 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { SE->forgetLoop(L); } -void IndVarSimplify::EliminateIVComparisons() { - // Look for ICmp users. - for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) { - IVStrideUse &UI = *I; - ICmpInst *ICmp = dyn_cast<ICmpInst>(UI.getUser()); - if (!ICmp) continue; - - bool Swapped = UI.getOperandValToReplace() == ICmp->getOperand(1); - ICmpInst::Predicate Pred = ICmp->getPredicate(); - if (Swapped) Pred = ICmpInst::getSwappedPredicate(Pred); - - // Get the SCEVs for the ICmp operands. - const SCEV *S = IU->getReplacementExpr(UI); - const SCEV *X = SE->getSCEV(ICmp->getOperand(!Swapped)); - - // Simplify unnecessary loops away. - const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); - S = SE->getSCEVAtScope(S, ICmpLoop); - X = SE->getSCEVAtScope(X, ICmpLoop); - - // If the condition is always true or always false, replace it with - // a constant value. - if (SE->isKnownPredicate(Pred, S, X)) - ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext())); - else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) - ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext())); - else - continue; +namespace { + // Collect information about induction variables that are used by sign/zero + // extend operations. This information is recorded by CollectExtend and + // provides the input to WidenIV. + struct WideIVInfo { + const Type *WidestNativeType; // Widest integer type created [sz]ext + bool IsSigned; // Was an sext user seen before a zext? + + WideIVInfo() : WidestNativeType(0), IsSigned(false) {} + }; + typedef std::map<PHINode *, WideIVInfo> WideIVMap; +} + +/// CollectExtend - Update information about the induction variable that is +/// extended by this sign or zero extend operation. This is used to determine +/// the final width of the IV before actually widening it. +static void CollectExtend(CastInst *Cast, PHINode *Phi, bool IsSigned, + WideIVMap &IVMap, ScalarEvolution *SE, + const TargetData *TD) { + const Type *Ty = Cast->getType(); + uint64_t Width = SE->getTypeSizeInBits(Ty); + if (TD && !TD->isLegalInteger(Width)) + return; - DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); - DeadInsts.push_back(ICmp); + WideIVInfo &IVInfo = IVMap[Phi]; + if (!IVInfo.WidestNativeType) { + IVInfo.WidestNativeType = SE->getEffectiveSCEVType(Ty); + IVInfo.IsSigned = IsSigned; + return; } + + // We extend the IV to satisfy the sign of its first user, arbitrarily. + if (IVInfo.IsSigned != IsSigned) + return; + + if (Width > SE->getTypeSizeInBits(IVInfo.WidestNativeType)) + IVInfo.WidestNativeType = SE->getEffectiveSCEVType(Ty); } -void IndVarSimplify::EliminateIVRemainders() { - // Look for SRem and URem users. - for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) { - IVStrideUse &UI = *I; - BinaryOperator *Rem = dyn_cast<BinaryOperator>(UI.getUser()); - if (!Rem) continue; +namespace { +/// WidenIV - The goal of this transform is to remove sign and zero extends +/// without creating any new induction variables. To do this, it creates a new +/// phi of the wider type and redirects all users, either removing extends or +/// inserting truncs whenever we stop propagating the type. +/// +class WidenIV { + PHINode *OrigPhi; + const Type *WideType; + bool IsSigned; + + IVUsers *IU; + LoopInfo *LI; + Loop *L; + ScalarEvolution *SE; + DominatorTree *DT; + SmallVectorImpl<WeakVH> &DeadInsts; + + PHINode *WidePhi; + Instruction *WideInc; + const SCEV *WideIncExpr; + + SmallPtrSet<Instruction*,16> Processed; + +public: + WidenIV(PHINode *PN, const WideIVInfo &IVInfo, IVUsers *IUsers, + LoopInfo *LInfo, ScalarEvolution *SEv, DominatorTree *DTree, + SmallVectorImpl<WeakVH> &DI) : + OrigPhi(PN), + WideType(IVInfo.WidestNativeType), + IsSigned(IVInfo.IsSigned), + IU(IUsers), + LI(LInfo), + L(LI->getLoopFor(OrigPhi->getParent())), + SE(SEv), + DT(DTree), + DeadInsts(DI), + WidePhi(0), + WideInc(0), + WideIncExpr(0) { + assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); + } - bool isSigned = Rem->getOpcode() == Instruction::SRem; - if (!isSigned && Rem->getOpcode() != Instruction::URem) - continue; + bool CreateWideIV(SCEVExpander &Rewriter); - // We're only interested in the case where we know something about - // the numerator. - if (UI.getOperandValToReplace() != Rem->getOperand(0)) - continue; +protected: + Instruction *CloneIVUser(Instruction *NarrowUse, + Instruction *NarrowDef, + Instruction *WideDef); - // Get the SCEVs for the ICmp operands. - const SCEV *S = SE->getSCEV(Rem->getOperand(0)); - const SCEV *X = SE->getSCEV(Rem->getOperand(1)); - - // Simplify unnecessary loops away. - const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent()); - S = SE->getSCEVAtScope(S, ICmpLoop); - X = SE->getSCEVAtScope(X, ICmpLoop); - - // i % n --> i if i is in [0,n). - if ((!isSigned || SE->isKnownNonNegative(S)) && - SE->isKnownPredicate(isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, - S, X)) - Rem->replaceAllUsesWith(Rem->getOperand(0)); - else { - // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). - const SCEV *LessOne = - SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1)); - if ((!isSigned || SE->isKnownNonNegative(LessOne)) && - SE->isKnownPredicate(isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, - LessOne, X)) { - ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, - Rem->getOperand(0), Rem->getOperand(1), - "tmp"); - SelectInst *Sel = - SelectInst::Create(ICmp, - ConstantInt::get(Rem->getType(), 0), - Rem->getOperand(0), "tmp", Rem); - Rem->replaceAllUsesWith(Sel); - } else + const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse); + + Instruction *WidenIVUse(Instruction *NarrowUse, + Instruction *NarrowDef, + Instruction *WideDef); +}; +} // anonymous namespace + +/// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this +/// loop. IVUsers is treated as a worklist. Each successive simplification may +/// push more users which may themselves be candidates for simplification. +/// +void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) { + WideIVMap IVMap; + + // Each round of simplification involves a round of eliminating operations + // followed by a round of widening IVs. A single IVUsers worklist is used + // across all rounds. The inner loop advances the user. If widening exposes + // more uses, then another pass through the outer loop is triggered. + for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E;) { + for(; I != E; ++I) { + Instruction *UseInst = I->getUser(); + Value *IVOperand = I->getOperandValToReplace(); + + if (DisableIVRewrite) { + if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) { + bool IsSigned = Cast->getOpcode() == Instruction::SExt; + if (IsSigned || Cast->getOpcode() == Instruction::ZExt) { + CollectExtend(Cast, I->getPhi(), IsSigned, IVMap, SE, TD); + continue; + } + } + } + if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { + EliminateIVComparison(ICmp, IVOperand); continue; + } + if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { + bool IsSigned = Rem->getOpcode() == Instruction::SRem; + if (IsSigned || Rem->getOpcode() == Instruction::URem) { + EliminateIVRemainder(Rem, IVOperand, IsSigned, I->getPhi()); + continue; + } + } + } + for (WideIVMap::const_iterator I = IVMap.begin(), E = IVMap.end(); + I != E; ++I) { + WidenIV Widener(I->first, I->second, IU, LI, SE, DT, DeadInsts); + if (Widener.CreateWideIV(Rewriter)) + Changed = true; } + } +} - // Inform IVUsers about the new users. - if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0))) - IU->AddUsersIfInteresting(I); +static Value *getExtend( Value *NarrowOper, const Type *WideType, + bool IsSigned, IRBuilder<> &Builder) { + return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : + Builder.CreateZExt(NarrowOper, WideType); +} - DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); - DeadInsts.push_back(Rem); +/// CloneIVUser - Instantiate a wide operation to replace a narrow +/// operation. This only needs to handle operations that can evaluation to +/// SCEVAddRec. It can safely return 0 for any operation we decide not to clone. +Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse, + Instruction *NarrowDef, + Instruction *WideDef) { + unsigned Opcode = NarrowUse->getOpcode(); + switch (Opcode) { + default: + return 0; + case Instruction::Add: + case Instruction::Mul: + case Instruction::UDiv: + case Instruction::Sub: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + DEBUG(dbgs() << "Cloning IVUser: " << *NarrowUse << "\n"); + + IRBuilder<> Builder(NarrowUse); + + // Replace NarrowDef operands with WideDef. Otherwise, we don't know + // anything about the narrow operand yet so must insert a [sz]ext. It is + // probably loop invariant and will be folded or hoisted. If it actually + // comes from a widened IV, it should be removed during a future call to + // WidenIVUse. + Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) ? WideDef : + getExtend(NarrowUse->getOperand(0), WideType, IsSigned, Builder); + Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) ? WideDef : + getExtend(NarrowUse->getOperand(1), WideType, IsSigned, Builder); + + BinaryOperator *NarrowBO = cast<BinaryOperator>(NarrowUse); + BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), + LHS, RHS, + NarrowBO->getName()); + Builder.Insert(WideBO); + if (NarrowBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap(); + if (NarrowBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap(); + + return WideBO; } + llvm_unreachable(0); +} + +// GetWideRecurrence - Is this instruction potentially interesting from IVUsers' +// perspective after widening it's type? In other words, can the extend be +// safely hoisted out of the loop with SCEV reducing the value to a recurrence +// on the same loop. If so, return the sign or zero extended +// recurrence. Otherwise return NULL. +const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { + if (!SE->isSCEVable(NarrowUse->getType())) + return 0; + + const SCEV *NarrowExpr = SE->getSCEV(NarrowUse); + const SCEV *WideExpr = IsSigned ? + SE->getSignExtendExpr(NarrowExpr, WideType) : + SE->getZeroExtendExpr(NarrowExpr, WideType); + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); + if (!AddRec || AddRec->getLoop() != L) + return 0; + + return AddRec; +} + +/// HoistStep - Attempt to hoist an IV increment above a potential use. +/// +/// To successfully hoist, two criteria must be met: +/// - IncV operands dominate InsertPos and +/// - InsertPos dominates IncV +/// +/// Meeting the second condition means that we don't need to check all of IncV's +/// existing uses (it's moving up in the domtree). +/// +/// This does not yet recursively hoist the operands, although that would +/// not be difficult. +static bool HoistStep(Instruction *IncV, Instruction *InsertPos, + const DominatorTree *DT) +{ + if (DT->dominates(IncV, InsertPos)) + return true; + + if (!DT->dominates(InsertPos->getParent(), IncV->getParent())) + return false; + + if (IncV->mayHaveSideEffects()) + return false; + + // Attempt to hoist IncV + for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end(); + OI != OE; ++OI) { + Instruction *OInst = dyn_cast<Instruction>(OI); + if (OInst && !DT->dominates(OInst, InsertPos)) + return false; + } + IncV->moveBefore(InsertPos); + return true; +} + +/// WidenIVUse - Determine whether an individual user of the narrow IV can be +/// widened. If so, return the wide clone of the user. +Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse, + Instruction *NarrowDef, + Instruction *WideDef) { + // To be consistent with IVUsers, stop traversing the def-use chain at + // inner-loop phis or post-loop phis. + if (isa<PHINode>(NarrowUse) && LI->getLoopFor(NarrowUse->getParent()) != L) + return 0; + + // Handle data flow merges and bizarre phi cycles. + if (!Processed.insert(NarrowUse)) + return 0; + + // Our raison d'etre! Eliminate sign and zero extension. + if (IsSigned ? isa<SExtInst>(NarrowUse) : isa<ZExtInst>(NarrowUse)) { + Value *NewDef = WideDef; + if (NarrowUse->getType() != WideType) { + unsigned CastWidth = SE->getTypeSizeInBits(NarrowUse->getType()); + unsigned IVWidth = SE->getTypeSizeInBits(WideType); + if (CastWidth < IVWidth) { + // The cast isn't as wide as the IV, so insert a Trunc. + IRBuilder<> Builder(NarrowUse); + NewDef = Builder.CreateTrunc(WideDef, NarrowUse->getType()); + } + else { + // A wider extend was hidden behind a narrower one. This may induce + // another round of IV widening in which the intermediate IV becomes + // dead. It should be very rare. + DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi + << " not wide enough to subsume " << *NarrowUse << "\n"); + NarrowUse->replaceUsesOfWith(NarrowDef, WideDef); + NewDef = NarrowUse; + } + } + if (NewDef != NarrowUse) { + DEBUG(dbgs() << "INDVARS: eliminating " << *NarrowUse + << " replaced by " << *WideDef << "\n"); + ++NumElimExt; + NarrowUse->replaceAllUsesWith(NewDef); + DeadInsts.push_back(NarrowUse); + } + // Now that the extend is gone, expose it's uses to IVUsers for potential + // further simplification within SimplifyIVUsers. + IU->AddUsersIfInteresting(WideDef, WidePhi); + + // No further widening is needed. The deceased [sz]ext had done it for us. + return 0; + } + const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(NarrowUse); + if (!WideAddRec) { + // This user does not evaluate to a recurence after widening, so don't + // follow it. Instead insert a Trunc to kill off the original use, + // eventually isolating the original narrow IV so it can be removed. + IRBuilder<> Builder(NarrowUse); + Value *Trunc = Builder.CreateTrunc(WideDef, NarrowDef->getType()); + NarrowUse->replaceUsesOfWith(NarrowDef, Trunc); + return 0; + } + // Reuse the IV increment that SCEVExpander created as long as it dominates + // NarrowUse. + Instruction *WideUse = 0; + if (WideAddRec == WideIncExpr && HoistStep(WideInc, NarrowUse, DT)) { + WideUse = WideInc; + } + else { + WideUse = CloneIVUser(NarrowUse, NarrowDef, WideDef); + if (!WideUse) + return 0; + } + // GetWideRecurrence ensured that the narrow expression could be extended + // outside the loop without overflow. This suggests that the wide use + // evaluates to the same expression as the extended narrow use, but doesn't + // absolutely guarantee it. Hence the following failsafe check. In rare cases + // where it fails, we simple throw away the newly created wide use. + if (WideAddRec != SE->getSCEV(WideUse)) { + DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse + << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n"); + DeadInsts.push_back(WideUse); + return 0; + } + + // Returning WideUse pushes it on the worklist. + return WideUse; +} + +/// CreateWideIV - Process a single induction variable. First use the +/// SCEVExpander to create a wide induction variable that evaluates to the same +/// recurrence as the original narrow IV. Then use a worklist to forward +/// traverse the narrow IV's def-use chain. After WidenIVUse as processed all +/// interesting IV users, the narrow IV will be isolated for removal by +/// DeleteDeadPHIs. +/// +/// It would be simpler to delete uses as they are processed, but we must avoid +/// invalidating SCEV expressions. +/// +bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) { + // Is this phi an induction variable? + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); + if (!AddRec) + return false; + + // Widen the induction variable expression. + const SCEV *WideIVExpr = IsSigned ? + SE->getSignExtendExpr(AddRec, WideType) : + SE->getZeroExtendExpr(AddRec, WideType); + + assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && + "Expect the new IV expression to preserve its type"); + + // Can the IV be extended outside the loop without overflow? + AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); + if (!AddRec || AddRec->getLoop() != L) + return false; + + // An AddRec must have loop-invariant operands. Since this AddRec it + // materialized by a loop header phi, the expression cannot have any post-loop + // operands, so they must dominate the loop header. + assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) && + SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) + && "Loop header phi recurrence inputs do not dominate the loop"); + + // The rewriter provides a value for the desired IV expression. This may + // either find an existing phi or materialize a new one. Either way, we + // expect a well-formed cyclic phi-with-increments. i.e. any operand not part + // of the phi-SCC dominates the loop entry. + Instruction *InsertPt = L->getHeader()->begin(); + WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt)); + + // Remembering the WideIV increment generated by SCEVExpander allows + // WidenIVUse to reuse it when widening the narrow IV's increment. We don't + // employ a general reuse mechanism because the call above is the only call to + // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. + if (BasicBlock *LatchBlock = L->getLoopLatch()) { + WideInc = + cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); + WideIncExpr = SE->getSCEV(WideInc); + } + + DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); + ++NumWidened; + + // Traverse the def-use chain using a worklist starting at the original IV. + assert(Processed.empty() && "expect initial state" ); + + // Each worklist entry has a Narrow def-use link and Wide def. + SmallVector<std::pair<Use *, Instruction *>, 8> NarrowIVUsers; + for (Value::use_iterator UI = OrigPhi->use_begin(), + UE = OrigPhi->use_end(); UI != UE; ++UI) { + NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WidePhi)); + } + while (!NarrowIVUsers.empty()) { + Use *NarrowDefUse; + Instruction *WideDef; + tie(NarrowDefUse, WideDef) = NarrowIVUsers.pop_back_val(); + + // Process a def-use edge. This may replace the use, so don't hold a + // use_iterator across it. + Instruction *NarrowDef = cast<Instruction>(NarrowDefUse->get()); + Instruction *NarrowUse = cast<Instruction>(NarrowDefUse->getUser()); + Instruction *WideUse = WidenIVUse(NarrowUse, NarrowDef, WideDef); + + // Follow all def-use edges from the previous narrow use. + if (WideUse) { + for (Value::use_iterator UI = NarrowUse->use_begin(), + UE = NarrowUse->use_end(); UI != UE; ++UI) { + NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WideUse)); + } + } + // WidenIVUse may have removed the def-use edge. + if (NarrowDef->use_empty()) + DeadInsts.push_back(NarrowDef); + } + return true; +} + +void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { + unsigned IVOperIdx = 0; + ICmpInst::Predicate Pred = ICmp->getPredicate(); + if (IVOperand != ICmp->getOperand(0)) { + // Swapped + assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); + IVOperIdx = 1; + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + // Get the SCEVs for the ICmp operands. + const SCEV *S = SE->getSCEV(ICmp->getOperand(IVOperIdx)); + const SCEV *X = SE->getSCEV(ICmp->getOperand(1 - IVOperIdx)); + + // Simplify unnecessary loops away. + const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent()); + S = SE->getSCEVAtScope(S, ICmpLoop); + X = SE->getSCEVAtScope(X, ICmpLoop); + + // If the condition is always true or always false, replace it with + // a constant value. + if (SE->isKnownPredicate(Pred, S, X)) + ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext())); + else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) + ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext())); + else + return; + + DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); + ++NumElimCmp; + Changed = true; + DeadInsts.push_back(ICmp); +} + +void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem, + Value *IVOperand, + bool IsSigned, + PHINode *IVPhi) { + // We're only interested in the case where we know something about + // the numerator. + if (IVOperand != Rem->getOperand(0)) + return; + + // Get the SCEVs for the ICmp operands. + const SCEV *S = SE->getSCEV(Rem->getOperand(0)); + const SCEV *X = SE->getSCEV(Rem->getOperand(1)); + + // Simplify unnecessary loops away. + const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent()); + S = SE->getSCEVAtScope(S, ICmpLoop); + X = SE->getSCEVAtScope(X, ICmpLoop); + + // i % n --> i if i is in [0,n). + if ((!IsSigned || SE->isKnownNonNegative(S)) && + SE->isKnownPredicate(IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + S, X)) + Rem->replaceAllUsesWith(Rem->getOperand(0)); + else { + // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). + const SCEV *LessOne = + SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1)); + if (IsSigned && !SE->isKnownNonNegative(LessOne)) + return; + + if (!SE->isKnownPredicate(IsSigned ? + ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + LessOne, X)) + return; + + ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, + Rem->getOperand(0), Rem->getOperand(1), + "tmp"); + SelectInst *Sel = + SelectInst::Create(ICmp, + ConstantInt::get(Rem->getType(), 0), + Rem->getOperand(0), "tmp", Rem); + Rem->replaceAllUsesWith(Sel); + } + + // Inform IVUsers about the new users. + if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0))) + IU->AddUsersIfInteresting(I, IVPhi); + + DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); + ++NumElimRem; + Changed = true; + DeadInsts.push_back(Rem); } bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { @@ -526,6 +1014,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { LI = &getAnalysis<LoopInfo>(); SE = &getAnalysis<ScalarEvolution>(); DT = &getAnalysis<DominatorTree>(); + TD = getAnalysisIfAvailable<TargetData>(); + DeadInsts.clear(); Changed = false; @@ -533,11 +1023,12 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // transform them to use integer recurrences. RewriteNonIntegerIVs(L); - BasicBlock *ExitingBlock = L->getExitingBlock(); // may be null const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); // Create a rewriter object which we'll use to transform the code with. SCEVExpander Rewriter(*SE); + if (DisableIVRewrite) + Rewriter.disableCanonicalMode(); // Check to see if this loop has a computable loop-invariant execution count. // If so, this means that we can compute the final value of any expressions @@ -548,33 +1039,42 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount)) RewriteLoopExitValues(L, Rewriter); - // Simplify ICmp IV users. - EliminateIVComparisons(); - - // Simplify SRem and URem IV users. - EliminateIVRemainders(); + // Eliminate redundant IV users. + SimplifyIVUsers(Rewriter); // Compute the type of the largest recurrence expression, and decide whether // a canonical induction variable should be inserted. const Type *LargestType = 0; bool NeedCannIV = false; - if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { - LargestType = BackedgeTakenCount->getType(); - LargestType = SE->getEffectiveSCEVType(LargestType); + bool ExpandBECount = canExpandBackedgeTakenCount(L, SE); + if (ExpandBECount) { // If we have a known trip count and a single exit block, we'll be // rewriting the loop exit test condition below, which requires a // canonical induction variable. - if (ExitingBlock) - NeedCannIV = true; - } - for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) { - const Type *Ty = - SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType()); + NeedCannIV = true; + const Type *Ty = BackedgeTakenCount->getType(); + if (DisableIVRewrite) { + // In this mode, SimplifyIVUsers may have already widened the IV used by + // the backedge test and inserted a Trunc on the compare's operand. Get + // the wider type to avoid creating a redundant narrow IV only used by the + // loop test. + LargestType = getBackedgeIVType(L); + } if (!LargestType || SE->getTypeSizeInBits(Ty) > + SE->getTypeSizeInBits(LargestType)) + LargestType = SE->getEffectiveSCEVType(Ty); + } + if (!DisableIVRewrite) { + for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) { + NeedCannIV = true; + const Type *Ty = + SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType()); + if (!LargestType || + SE->getTypeSizeInBits(Ty) > SE->getTypeSizeInBits(LargestType)) - LargestType = Ty; - NeedCannIV = true; + LargestType = Ty; + } } // Now that we know the largest of the induction variable expressions @@ -614,19 +1114,17 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // If we have a trip count expression, rewrite the loop's exit condition // using it. We can currently only handle loops with a single exit. ICmpInst *NewICmp = 0; - if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && - !BackedgeTakenCount->isZero() && - ExitingBlock) { + if (ExpandBECount) { + assert(canExpandBackedgeTakenCount(L, SE) && + "canonical IV disrupted BackedgeTaken expansion"); assert(NeedCannIV && "LinearFunctionTestReplace requires a canonical induction variable"); - // Can't rewrite non-branch yet. - if (BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator())) - NewICmp = LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, - ExitingBlock, BI, Rewriter); + NewICmp = LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, + Rewriter); } - // Rewrite IV-derived expressions. - RewriteIVExpressions(L, Rewriter); + if (!DisableIVRewrite) + RewriteIVExpressions(L, Rewriter); // Clear the rewriter cache, because values that are in the rewriter's cache // can be deleted in the loop below, causing the AssertingVH in the cache to @@ -649,7 +1147,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // For completeness, inform IVUsers of the IV use in the newly-created // loop exit test instruction. if (NewICmp) - IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0))); + IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)), + IndVar); // Clean up dead instructions. Changed |= DeleteDeadPHIs(L->getHeader()); @@ -1080,5 +1579,5 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { } // Add a new IVUsers entry for the newly-created integer PHI. - IU->AddUsersIfInteresting(NewPHI); + IU->AddUsersIfInteresting(NewPHI, NewPHI); } diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 7168177..cf18ff0 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -706,7 +706,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { DEBUG(dbgs() << " In block '" << BB->getName() << "' folding terminator: " << *BB->getTerminator() << '\n'); ++NumFolds; - ConstantFoldTerminator(BB); + ConstantFoldTerminator(BB, true); return true; } @@ -929,9 +929,10 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { if (UnavailablePred) { assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 && "Can't handle critical edge here!"); - Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false, + LoadInst *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false, LI->getAlignment(), UnavailablePred->getTerminator()); + NewVal->setDebugLoc(LI->getDebugLoc()); AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal)); } @@ -944,6 +945,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "", LoadBB->begin()); PN->takeName(LI); + PN->setDebugLoc(LI->getDebugLoc()); // Insert new entries into the PHI for each predecessor. A single block may // have multiple entries here. @@ -1375,7 +1377,8 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, // We didn't copy the terminator from BB over to NewBB, because there is now // an unconditional jump to SuccBB. Insert the unconditional jump. - BranchInst::Create(SuccBB, NewBB); + BranchInst *NewBI =BranchInst::Create(SuccBB, NewBB); + NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc()); // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the // PHI nodes for NewBB now. diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 93de9cf..13bd022 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -372,7 +372,11 @@ bool LICM::canSinkOrHoistInst(Instruction &I) { return !pointerInvalidatedByLoop(LI->getOperand(0), Size, LI->getMetadata(LLVMContext::MD_tbaa)); } else if (CallInst *CI = dyn_cast<CallInst>(&I)) { - // Handle obvious cases efficiently. + // Don't sink or hoist dbg info; it's legal, but not useful. + if (isa<DbgInfoIntrinsic>(I)) + return false; + + // Handle simple cases by querying alias analysis. AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI); if (Behavior == AliasAnalysis::DoesNotAccessMemory) return true; @@ -445,8 +449,7 @@ void LICM::sink(Instruction &I) { // enough that we handle it as a special (more efficient) case. It is more // efficient to handle because there are no PHI nodes that need to be placed. if (ExitBlocks.size() == 1) { - if (!isa<DbgInfoIntrinsic>(I) && - !DT->dominates(I.getParent(), ExitBlocks[0])) { + if (!DT->dominates(I.getParent(), ExitBlocks[0])) { // Instruction is not used, just delete it. CurAST->deleteValue(&I); // If I has users in unreachable blocks, eliminate. @@ -602,13 +605,15 @@ namespace { SmallPtrSet<Value*, 4> &PointerMustAliases; SmallVectorImpl<BasicBlock*> &LoopExitBlocks; AliasSetTracker &AST; + DebugLoc DL; public: LoopPromoter(Value *SP, const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, SmallPtrSet<Value*, 4> &PMA, - SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast) - : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), - LoopExitBlocks(LEB), AST(ast) {} + SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast, + DebugLoc dl) + : LoadAndStorePromoter(Insts, S, 0, 0), SomePtr(SP), + PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl) {} virtual bool isInstInList(Instruction *I, const SmallVectorImpl<Instruction*> &) const { @@ -629,7 +634,8 @@ namespace { BasicBlock *ExitBlock = LoopExitBlocks[i]; Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); Instruction *InsertPos = ExitBlock->getFirstNonPHI(); - new StoreInst(LiveInValue, SomePtr, InsertPos); + StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos); + NewSI->setDebugLoc(DL); } } @@ -727,6 +733,12 @@ void LICM::PromoteAliasSet(AliasSet &AS) { Changed = true; ++NumPromoted; + // Grab a debug location for the inserted loads/stores; given that the + // inserted loads/stores have little relation to the original loads/stores, + // this code just arbitrarily picks a location from one, since any debug + // location is better than none. + DebugLoc DL = LoopUses[0]->getDebugLoc(); + SmallVector<BasicBlock*, 8> ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); @@ -734,13 +746,14 @@ void LICM::PromoteAliasSet(AliasSet &AS) { SmallVector<PHINode*, 16> NewPHIs; SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, - *CurAST); + *CurAST, DL); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. LoadInst *PreheaderLoad = new LoadInst(SomePtr, SomePtr->getName()+".promoted", Preheader->getTerminator()); + PreheaderLoad->setDebugLoc(DL); SSA.AddAvailableValue(Preheader, PreheaderLoad); // Rewrite all the loads in the loop and remember all the definitions from diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 1366231..dbf6eec 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -128,11 +128,11 @@ INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); } -/// DeleteDeadInstruction - Delete this instruction. Before we do, go through +/// deleteDeadInstruction - Delete this instruction. Before we do, go through /// and zero out all the operands of this instruction. If any of them become /// dead, delete them and the computation tree that feeds them. /// -static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { +static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { SmallVector<Instruction*, 32> NowDeadInsts; NowDeadInsts.push_back(I); @@ -162,6 +162,14 @@ static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { } while (!NowDeadInsts.empty()); } +/// deleteIfDeadInstruction - If the specified value is a dead instruction, +/// delete it and any recursively used instructions. +static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) { + if (Instruction *I = dyn_cast<Instruction>(V)) + if (isInstructionTriviallyDead(I)) + deleteDeadInstruction(I, SE); +} + bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { CurLoop = L; @@ -454,31 +462,35 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, return false; } - - // Okay, we have a strided store "p[i]" of a splattable value. We can turn - // this into a memset in the loop preheader now if we want. However, this - // would be unsafe to do if there is anything else in the loop that may read - // or write to the aliased location. Check for an alias. - if (mayLoopAccessLocation(DestPtr, AliasAnalysis::ModRef, - CurLoop, BECount, - StoreSize, getAnalysis<AliasAnalysis>(), TheStore)) - return false; - - // Okay, everything looks good, insert the memset. - BasicBlock *Preheader = CurLoop->getLoopPreheader(); - - IRBuilder<> Builder(Preheader->getTerminator()); - // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the - // header. Just insert code for it in the preheader. + // header. This allows us to insert code for it in the preheader. + BasicBlock *Preheader = CurLoop->getLoopPreheader(); + IRBuilder<> Builder(Preheader->getTerminator()); SCEVExpander Expander(*SE); - + + // Okay, we have a strided store "p[i]" of a splattable value. We can turn + // this into a memset in the loop preheader now if we want. However, this + // would be unsafe to do if there is anything else in the loop that may read + // or write to the aliased location. Check for any overlap by generating the + // base pointer and checking the region. unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace(); Value *BasePtr = Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace), Preheader->getTerminator()); + + if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef, + CurLoop, BECount, + StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){ + Expander.clear(); + // If we generated new code for the base pointer, clean up. + deleteIfDeadInstruction(BasePtr, *SE); + return false; + } + + // Okay, everything looks good, insert the memset. + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext()); @@ -521,7 +533,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. - DeleteDeadInstruction(TheStore, *SE); + deleteDeadInstruction(TheStore, *SE); ++NumMemSet; return true; } @@ -539,41 +551,51 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, LoadInst *LI = cast<LoadInst>(SI->getValueOperand()); + // The trip count of the loop and the base pointer of the addrec SCEV is + // guaranteed to be loop invariant, which means that it should dominate the + // header. This allows us to insert code for it in the preheader. + BasicBlock *Preheader = CurLoop->getLoopPreheader(); + IRBuilder<> Builder(Preheader->getTerminator()); + SCEVExpander Expander(*SE); + // Okay, we have a strided store "p[i]" of a loaded value. We can turn // this into a memcpy in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read - // or write to the stored location (including the load feeding the stores). - // Check for an alias. - if (mayLoopAccessLocation(SI->getPointerOperand(), AliasAnalysis::ModRef, + // or write the memory region we're storing to. This includes the load that + // feeds the stores. Check for an alias by generating the base address and + // checking everything. + Value *StoreBasePtr = + Expander.expandCodeFor(StoreEv->getStart(), + Builder.getInt8PtrTy(SI->getPointerAddressSpace()), + Preheader->getTerminator()); + + if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef, CurLoop, BECount, StoreSize, - getAnalysis<AliasAnalysis>(), SI)) + getAnalysis<AliasAnalysis>(), SI)) { + Expander.clear(); + // If we generated new code for the base pointer, clean up. + deleteIfDeadInstruction(StoreBasePtr, *SE); return false; + } // For a memcpy, we have to make sure that the input array is not being // mutated by the loop. - if (mayLoopAccessLocation(LI->getPointerOperand(), AliasAnalysis::Mod, - CurLoop, BECount, StoreSize, - getAnalysis<AliasAnalysis>(), SI)) - return false; - - // Okay, everything looks good, insert the memcpy. - BasicBlock *Preheader = CurLoop->getLoopPreheader(); - - IRBuilder<> Builder(Preheader->getTerminator()); - - // The trip count of the loop and the base pointer of the addrec SCEV is - // guaranteed to be loop invariant, which means that it should dominate the - // header. Just insert code for it in the preheader. - SCEVExpander Expander(*SE); - Value *LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(), Builder.getInt8PtrTy(LI->getPointerAddressSpace()), Preheader->getTerminator()); - Value *StoreBasePtr = - Expander.expandCodeFor(StoreEv->getStart(), - Builder.getInt8PtrTy(SI->getPointerAddressSpace()), - Preheader->getTerminator()); + + if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount, + StoreSize, getAnalysis<AliasAnalysis>(), SI)) { + Expander.clear(); + // If we generated new code for the base pointer, clean up. + deleteIfDeadInstruction(LoadBasePtr, *SE); + deleteIfDeadInstruction(StoreBasePtr, *SE); + return false; + } + + // Okay, everything is safe, we can transform this! + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. @@ -589,18 +611,19 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); - Value *NewCall = + CallInst *NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, std::min(SI->getAlignment(), LI->getAlignment())); + NewCall->setDebugLoc(SI->getDebugLoc()); DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); - (void)NewCall; + // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. - DeleteDeadInstruction(SI, *SE); + deleteDeadInstruction(SI, *SE); ++NumMemCpy; return true; } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 5abc790..73ebd61 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -209,7 +209,12 @@ struct Formula { /// when AM.Scale is not zero. const SCEV *ScaledReg; - Formula() : ScaledReg(0) {} + /// UnfoldedOffset - An additional constant offset which added near the + /// use. This requires a temporary register, but the offset itself can + /// live in an add immediate field rather than a register. + int64_t UnfoldedOffset; + + Formula() : ScaledReg(0), UnfoldedOffset(0) {} void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE); @@ -379,6 +384,10 @@ void Formula::print(raw_ostream &OS) const { OS << "<unknown>"; OS << ')'; } + if (UnfoldedOffset != 0) { + if (!First) OS << " + "; else First = false; + OS << "imm(" << UnfoldedOffset << ')'; + } } void Formula::dump() const { @@ -771,8 +780,10 @@ void Cost::RateFormula(const Formula &F, RatePrimaryRegister(BaseReg, Regs, L, SE, DT); } - if (F.BaseRegs.size() > 1) - NumBaseAdds += F.BaseRegs.size() - 1; + // Determine how many (unfolded) adds we'll need inside the loop. + size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0); + if (NumBaseParts > 1) + NumBaseAdds += NumBaseParts - 1; // Tally up the non-zero immediates. for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(), @@ -1793,7 +1804,8 @@ LSRInstance::OptimizeLoopTermCond() { ExitingBlock->getInstList().insert(TermBr, Cond); // Clone the IVUse, as the old use still exists! - CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace()); + CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace(), + CondUse->getPhi()); TermBr->replaceUsesOfWith(OldCond, Cond); } } @@ -1945,7 +1957,8 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF, if (F.BaseRegs == OrigF.BaseRegs && F.ScaledReg == OrigF.ScaledReg && F.AM.BaseGV == OrigF.AM.BaseGV && - F.AM.Scale == OrigF.AM.Scale) { + F.AM.Scale == OrigF.AM.Scale && + F.UnfoldedOffset == OrigF.UnfoldedOffset) { if (F.AM.BaseOffs == 0) return &LU; // This is the formula where all the registers and symbols matched; @@ -2061,6 +2074,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { // x == y --> x - y == 0 const SCEV *N = SE.getSCEV(NV); if (SE.isLoopInvariant(N, L)) { + // S is normalized, so normalize N before folding it into S + // to keep the result normalized. + N = TransformForPostIncUse(Normalize, N, CI, 0, + LF.PostIncLoops, SE, DT); Kind = LSRUse::ICmpZero; S = SE.getMinusSCEV(N, S); } @@ -2313,8 +2330,29 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, if (InnerSum->isZero()) continue; Formula F = Base; - F.BaseRegs[i] = InnerSum; - F.BaseRegs.push_back(*J); + + // Add the remaining pieces of the add back into the new formula. + const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum); + if (TLI && InnerSumSC && + SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 && + TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset + + InnerSumSC->getValue()->getZExtValue())) { + F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset + + InnerSumSC->getValue()->getZExtValue(); + F.BaseRegs.erase(F.BaseRegs.begin() + i); + } else + F.BaseRegs[i] = InnerSum; + + // Add J as its own register, or an unfolded immediate. + const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J); + if (TLI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 && + TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset + + SC->getValue()->getZExtValue())) + F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset + + SC->getValue()->getZExtValue(); + else + F.BaseRegs.push_back(*J); + if (InsertFormula(LU, LUIdx, F)) // If that formula hadn't been seen before, recurse to find more like // it. @@ -2482,6 +2520,15 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, continue; } + // Check that multiplying with the unfolded offset doesn't overflow. + if (F.UnfoldedOffset != 0) { + if (F.UnfoldedOffset == INT64_MIN && Factor == -1) + continue; + F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor; + if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset) + continue; + } + // If we make it here and it's legal, add it. (void)InsertFormula(LU, LUIdx, F); next:; @@ -2664,7 +2711,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { // other orig regs. ImmMapTy::const_iterator OtherImms[] = { Imms.begin(), prior(Imms.end()), - Imms.upper_bound((Imms.begin()->first + prior(Imms.end())->first) / 2) + Imms.lower_bound((Imms.begin()->first + prior(Imms.end())->first) / 2) }; for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) { ImmMapTy::const_iterator M = OtherImms[i]; @@ -2738,8 +2785,13 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { Formula NewF = F; NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm; if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset, - LU.Kind, LU.AccessTy, TLI)) - continue; + LU.Kind, LU.AccessTy, TLI)) { + if (!TLI || + !TLI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm)) + continue; + NewF = F; + NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm; + } NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg); // If the new formula has a constant in a register, and adding the @@ -3488,6 +3540,14 @@ Value *LSRInstance::Expand(const LSRFixup &LF, } } + // Expand the unfolded offset portion. + int64_t UnfoldedOffset = F.UnfoldedOffset; + if (UnfoldedOffset != 0) { + // Just add the immediate values. + Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, + UnfoldedOffset))); + } + // Emit instructions summing all the operands. const SCEV *FullS = Ops.empty() ? SE.getConstant(IntTy, 0) : diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index b4e3d31..e05f29c 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -258,6 +258,7 @@ bool LoopUnswitch::processCurrentLoop() { if (LoopCond && SI->getNumCases() > 1) { // Find a value to unswitch on: // FIXME: this should chose the most expensive case! + // FIXME: scan for a case with a non-critical edge? Constant *UnswitchVal = SI->getCaseValue(1); // Do not process same value again and again. if (!UnswitchedVals.insert(UnswitchVal)) @@ -560,6 +561,8 @@ void LoopUnswitch::SplitExitEdges(Loop *L, BasicBlock *ExitBlock = ExitBlocks[i]; SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock), pred_end(ExitBlock)); + // Although SplitBlockPredecessors doesn't preserve loop-simplify in + // general, if we call it on all predecessors of all exits then it does. SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(), ".us-lcssa", this); } @@ -786,8 +789,13 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB, // If this is the edge to the header block for a loop, remove the loop and // promote all subloops. if (Loop *BBLoop = LI->getLoopFor(BB)) { - if (BBLoop->getLoopLatch() == BB) + if (BBLoop->getLoopLatch() == BB) { RemoveLoopFromHierarchy(BBLoop); + if (currentLoop == BBLoop) { + currentLoop = 0; + redoLoop = false; + } + } } // Remove the block from the loop info, which removes it from any loops it @@ -859,7 +867,6 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, // FOLD boolean conditions (X|LIC), (X&LIC). Fold conditional branches, // selects, switches. - std::vector<User*> Users(LIC->use_begin(), LIC->use_end()); std::vector<Instruction*> Worklist; LLVMContext &Context = Val->getContext(); @@ -875,13 +882,14 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()), !cast<ConstantInt>(Val)->getZExtValue()); - for (unsigned i = 0, e = Users.size(); i != e; ++i) - if (Instruction *U = cast<Instruction>(Users[i])) { - if (!L->contains(U)) - continue; - U->replaceUsesOfWith(LIC, Replacement); - Worklist.push_back(U); - } + for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end(); + UI != E; ++UI) { + Instruction *U = dyn_cast<Instruction>(*UI); + if (!U || !L->contains(U)) + continue; + U->replaceUsesOfWith(LIC, Replacement); + Worklist.push_back(U); + } SimplifyCode(Worklist, L); return; } @@ -889,9 +897,10 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, // Otherwise, we don't know the precise value of LIC, but we do know that it // is certainly NOT "Val". As such, simplify any uses in the loop that we // can. This case occurs when we unswitch switch statements. - for (unsigned i = 0, e = Users.size(); i != e; ++i) { - Instruction *U = cast<Instruction>(Users[i]); - if (!L->contains(U)) + for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end(); + UI != E; ++UI) { + Instruction *U = dyn_cast<Instruction>(*UI); + if (!U || !L->contains(U)) continue; Worklist.push_back(U); @@ -909,13 +918,22 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, // Found a dead case value. Don't remove PHI nodes in the // successor if they become single-entry, those PHI nodes may // be in the Users list. - + + BasicBlock *Switch = SI->getParent(); + BasicBlock *SISucc = SI->getSuccessor(DeadCase); + BasicBlock *Latch = L->getLoopLatch(); + if (!SI->findCaseDest(SISucc)) continue; // Edge is critical. + // If the DeadCase successor dominates the loop latch, then the + // transformation isn't safe since it will delete the sole predecessor edge + // to the latch. + if (Latch && DT->dominates(SISucc, Latch)) + continue; + // FIXME: This is a hack. We need to keep the successor around // and hooked up so as to preserve the loop structure, because // trying to update it is complicated. So instead we preserve the // loop structure and put the block on a dead code path. - BasicBlock *Switch = SI->getParent(); - SplitEdge(Switch, SI->getSuccessor(DeadCase), this); + SplitEdge(Switch, SISucc, this); // Compute the successors instead of relying on the return value // of SplitEdge, since it may have split the switch successor // after PHI nodes. diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index a3035cb..be5aa2e 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/IRBuilder.h" @@ -459,7 +460,10 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) dbgs() << *Range.TheStores[i] << '\n'; dbgs() << "With: " << *AMemSet << '\n'); - + + if (!Range.TheStores.empty()) + AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); + // Zap all the stores. for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), @@ -484,11 +488,28 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // a memcpy. if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) { if (!LI->isVolatile() && LI->hasOneUse()) { - MemDepResult dep = MD->getDependency(LI); + MemDepResult ldep = MD->getDependency(LI); CallInst *C = 0; - if (dep.isClobber() && !isa<MemCpyInst>(dep.getInst())) - C = dyn_cast<CallInst>(dep.getInst()); - + if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst())) + C = dyn_cast<CallInst>(ldep.getInst()); + + if (C) { + // Check that nothing touches the dest of the "copy" between + // the call and the store. + MemDepResult sdep = MD->getDependency(SI); + if (!sdep.isNonLocal()) { + bool FoundCall = false; + for (BasicBlock::iterator I = SI, E = sdep.getInst(); I != E; --I) { + if (&*I == C) { + FoundCall = true; + break; + } + } + if (!FoundCall) + C = 0; + } + } + if (C) { bool changed = performCallSlotOptzn(LI, SI->getPointerOperand()->stripPointerCasts(), @@ -863,12 +884,16 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize) return false; - // Get the alignment of the byval. If it is greater than the memcpy, then we - // can't do the substitution. If the call doesn't specify the alignment, then - // it is some target specific value that we can't know. + // Get the alignment of the byval. If the call doesn't specify the alignment, + // then it is some target specific value that we can't know. unsigned ByValAlign = CS.getParamAlignment(ArgNo+1); - if (ByValAlign == 0 || MDep->getAlignment() < ByValAlign) - return false; + if (ByValAlign == 0) return false; + + // If it is greater than the memcpy, then we check to see if we can force the + // source of the memcpy to the alignment we need. If we fail, we bail out. + if (MDep->getAlignment() < ByValAlign && + getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, TD) < ByValAlign) + return false; // Verify that the copied-from memory doesn't change in between the memcpy and // the byval call. diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index db8eb85..083412e 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -655,7 +655,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { // Just mark all destinations executable! // TODO: This could be improved if the operand is a [cast of a] BlockAddress. - if (isa<IndirectBrInst>(&TI)) + if (isa<IndirectBrInst>(TI)) return true; #ifndef NDEBUG diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 8178c27..8938b28 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -30,6 +30,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" +#include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/ValueTracking.h" @@ -341,7 +342,8 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset, // If we're accessing something that could be an element of a vector, see // if the implied vector agrees with what we already have and if Offset is // compatible with it. - if (Offset % EltSize == 0 && AllocaSize % EltSize == 0) { + if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 && + (!VectorTy || Offset * 8 < VectorTy->getPrimitiveSizeInBits())) { if (!VectorTy) { VectorTy = VectorType::get(In, AllocaSize/EltSize); return; @@ -741,8 +743,9 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, // If the result alloca is a vector type, this is either an element // access or a bitcast to another vector type of the same size. if (const VectorType *VTy = dyn_cast<VectorType>(FromType)) { + unsigned FromTypeSize = TD.getTypeAllocSize(FromType); unsigned ToTypeSize = TD.getTypeAllocSize(ToType); - if (ToTypeSize == AllocaSize) { + if (FromTypeSize == ToTypeSize) { // If the two types have the same primitive size, use a bit cast. // Otherwise, it is two vectors with the same element type that has // the same allocation size but different number of elements so use @@ -754,13 +757,13 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, return CreateShuffleVectorCast(FromVal, ToType, Builder); } - if (isPowerOf2_64(AllocaSize / ToTypeSize)) { + if (isPowerOf2_64(FromTypeSize / ToTypeSize)) { assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value " "of a smaller vector type at a nonzero offset."); const Type *CastElementTy = getScaledElementType(FromType, ToType, ToTypeSize * 8); - unsigned NumCastVectorElements = AllocaSize / ToTypeSize; + unsigned NumCastVectorElements = FromTypeSize / ToTypeSize; LLVMContext &Context = FromVal->getContext(); const Type *CastTy = VectorType::get(CastElementTy, @@ -1051,8 +1054,9 @@ namespace { class AllocaPromoter : public LoadAndStorePromoter { AllocaInst *AI; public: - AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S) - : LoadAndStorePromoter(Insts, S), AI(0) {} + AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, + DbgDeclareInst *DD, DIBuilder *&DB) + : LoadAndStorePromoter(Insts, S, DD, DB), AI(0) {} void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) { // Remember which alloca we're promoting (for isInstInList). @@ -1329,7 +1333,6 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { return true; } - bool SROA::performPromotion(Function &F) { std::vector<AllocaInst*> Allocas; DominatorTree *DT = 0; @@ -1340,6 +1343,7 @@ bool SROA::performPromotion(Function &F) { bool Changed = false; SmallVector<Instruction*, 64> Insts; + DIBuilder *DIB = 0; while (1) { Allocas.clear(); @@ -1363,8 +1367,11 @@ bool SROA::performPromotion(Function &F) { for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ++UI) Insts.push_back(cast<Instruction>(*UI)); - - AllocaPromoter(Insts, SSA).run(AI, Insts); + + DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI); + if (DDI && !DIB) + DIB = new DIBuilder(*AI->getParent()->getParent()->getParent()); + AllocaPromoter(Insts, SSA, DDI, DIB).run(AI, Insts); Insts.clear(); } } @@ -1372,6 +1379,10 @@ bool SROA::performPromotion(Function &F) { Changed = true; } + // FIXME: Is there a better way to handle the lazy initialization of DIB + // so that there doesn't need to be an explicit delete? + delete DIB; + return Changed; } @@ -1831,9 +1842,10 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 // (Also works for arrays instead of structs) Value *Insert = UndefValue::get(LIType); + IRBuilder<> Builder(LI); for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { - Value *Load = new LoadInst(NewElts[i], "load", LI); - Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); + Value *Load = Builder.CreateLoad(NewElts[i], "load"); + Insert = Builder.CreateInsertValue(Insert, Load, i, "insert"); } LI->replaceAllUsesWith(Insert); DeadInsts.push_back(LI); @@ -1858,9 +1870,10 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, // %val.1 = extractvalue { i32, i32 } %val, 1 // store i32 %val.1, i32* %alloc.1 // (Also works for arrays instead of structs) + IRBuilder<> Builder(SI); for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { - Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); - new StoreInst(Extract, NewElts[i], SI); + Value *Extract = Builder.CreateExtractValue(Val, i, Val->getName()); + Builder.CreateStore(Extract, NewElts[i]); } DeadInsts.push_back(SI); } else if (SIType->isIntegerTy() && @@ -2481,19 +2494,22 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, } if (CallSite CS = U) { - // If this is a readonly/readnone call site, then we know it is just a - // load and we can ignore it. - if (CS.onlyReadsMemory()) - continue; - // If this is the function being called then we treat it like a load and // ignore it. if (CS.isCallee(UI)) continue; + // If this is a readonly/readnone call site, then we know it is just a + // load (but one that potentially returns the value itself), so we can + // ignore it if we know that the value isn't captured. + unsigned ArgNo = CS.getArgumentNo(UI); + if (CS.onlyReadsMemory() && + (CS.getInstruction()->use_empty() || + CS.paramHasAttr(ArgNo+1, Attribute::NoCapture))) + continue; + // If this is being passed as a byval argument, the caller is making a // copy, so it is only a read of the alloca. - unsigned ArgNo = CS.getArgumentNo(UI); if (CS.paramHasAttr(ArgNo+1, Attribute::ByVal)) continue; } diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 1137c2b..7e9cc80 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -96,6 +96,7 @@ static void ChangeToCall(InvokeInst *II) { NewCall->takeName(II); NewCall->setCallingConv(II->getCallingConv()); NewCall->setAttributes(II->getAttributes()); + NewCall->setDebugLoc(II->getDebugLoc()); II->replaceAllUsesWith(NewCall); // Follow the call by a branch to the normal destination. @@ -163,7 +164,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, Changed = true; } - Changed |= ConstantFoldTerminator(BB); + Changed |= ConstantFoldTerminator(BB, true); for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) Worklist.push_back(*SI); } while (!Worklist.empty()); diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 539cc6f..e21eb9d 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -59,6 +59,7 @@ #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/InlineCost.h" @@ -209,10 +210,10 @@ bool TailCallElim::runOnFunction(Function &F) { } } - // Finally, if this function contains no non-escaping allocas, mark all calls - // in the function as eligible for tail calls (there is no stack memory for - // them to access). - if (!FunctionContainsEscapingAllocas) + // Finally, if this function contains no non-escaping allocas, or calls + // setjmp, mark all calls in the function as eligible for tail calls + //(there is no stack memory for them to access). + if (!FunctionContainsEscapingAllocas && !F.callsFunctionThatReturnsTwice()) for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (CallInst *CI = dyn_cast<CallInst>(I)) { |