diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Scalar')
23 files changed, 1284 insertions, 918 deletions
diff --git a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp index 9536939..0184390 100644 --- a/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -47,21 +47,21 @@ using namespace llvm; using namespace llvm::PatternMatch; STATISTIC(NumBlocksElim, "Number of blocks eliminated"); -STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); -STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); +STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); +STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " "sunken Cmps"); STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " "of sunken Casts"); STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " "computations were sunk"); -STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); -STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); +STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); +STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); +STATISTIC(NumRetsDup, "Number of return instructions duplicated"); -static cl::opt<bool> -CriticalEdgeSplit("cgp-critical-edge-splitting", - cl::desc("Split critical edges during codegen prepare"), - cl::init(false), cl::Hidden); +static cl::opt<bool> DisableBranchOpts( + "disable-cgp-branch-opts", cl::Hidden, cl::init(false), + cl::desc("Disable branch optimizations in CodeGenPrepare")); namespace { class CodeGenPrepare : public FunctionPass { @@ -76,15 +76,15 @@ namespace { /// update it. BasicBlock::iterator CurInstIterator; - /// BackEdges - Keep a set of all the loop back edges. - /// - SmallSet<std::pair<const BasicBlock*, const BasicBlock*>, 8> BackEdges; - - // Keeps track of non-local addresses that have been sunk into a block. This - // allows us to avoid inserting duplicate code for blocks with multiple - // load/stores of the same address. + /// Keeps track of non-local addresses that have been sunk into a block. + /// This allows us to avoid inserting duplicate code for blocks with + /// multiple load/stores of the same address. DenseMap<Value*, Value*> SunkAddrs; + /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to + /// be updated. + bool ModifiedDT; + public: static char ID; // Pass identification, replacement for typeid explicit CodeGenPrepare(const TargetLowering *tli = 0) @@ -98,10 +98,6 @@ namespace { AU.addPreserved<ProfileInfo>(); } - virtual void releaseMemory() { - BackEdges.clear(); - } - private: bool EliminateMostlyEmptyBlocks(Function &F); bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; @@ -113,7 +109,7 @@ namespace { bool OptimizeCallInst(CallInst *CI); bool MoveExtToFormExtLoad(Instruction *I); bool OptimizeExtUses(Instruction *I); - void findLoopBackEdges(const Function &F); + bool DupRetToEnableTailCallOpts(ReturnInst *RI); }; } @@ -125,40 +121,42 @@ FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { return new CodeGenPrepare(TLI); } -/// findLoopBackEdges - Do a DFS walk to find loop back edges. -/// -void CodeGenPrepare::findLoopBackEdges(const Function &F) { - SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges; - FindFunctionBackedges(F, Edges); - - BackEdges.insert(Edges.begin(), Edges.end()); -} - - bool CodeGenPrepare::runOnFunction(Function &F) { bool EverMadeChange = false; + ModifiedDT = false; DT = getAnalysisIfAvailable<DominatorTree>(); PFI = getAnalysisIfAvailable<ProfileInfo>(); + // First pass, eliminate blocks that contain only PHI nodes and an // unconditional branch. EverMadeChange |= EliminateMostlyEmptyBlocks(F); - // Now find loop back edges, but only if they are being used to decide which - // critical edges to split. - if (CriticalEdgeSplit) - findLoopBackEdges(F); - bool MadeChange = true; while (MadeChange) { MadeChange = false; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { + BasicBlock *BB = I++; MadeChange |= OptimizeBlock(*BB); + } EverMadeChange |= MadeChange; } SunkAddrs.clear(); + if (!DisableBranchOpts) { + MadeChange = false; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + MadeChange |= ConstantFoldTerminator(BB); + + if (MadeChange) + ModifiedDT = true; + EverMadeChange |= MadeChange; + } + + if (ModifiedDT && DT) + DT->DT->recalculate(F); + return EverMadeChange; } @@ -333,7 +331,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { // The PHIs are now updated, change everything that refers to BB to use // DestBB and remove BB. BB->replaceAllUsesWith(DestBB); - if (DT) { + if (DT && !ModifiedDT) { BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); @@ -350,110 +348,6 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); } -/// FindReusablePredBB - Check all of the predecessors of the block DestPHI -/// lives in to see if there is a block that we can reuse as a critical edge -/// from TIBB. -static BasicBlock *FindReusablePredBB(PHINode *DestPHI, BasicBlock *TIBB) { - BasicBlock *Dest = DestPHI->getParent(); - - /// TIPHIValues - This array is lazily computed to determine the values of - /// PHIs in Dest that TI would provide. - SmallVector<Value*, 32> TIPHIValues; - - /// TIBBEntryNo - This is a cache to speed up pred queries for TIBB. - unsigned TIBBEntryNo = 0; - - // Check to see if Dest has any blocks that can be used as a split edge for - // this terminator. - for (unsigned pi = 0, e = DestPHI->getNumIncomingValues(); pi != e; ++pi) { - BasicBlock *Pred = DestPHI->getIncomingBlock(pi); - // To be usable, the pred has to end with an uncond branch to the dest. - BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator()); - if (!PredBr || !PredBr->isUnconditional()) - continue; - // Must be empty other than the branch and debug info. - BasicBlock::iterator I = Pred->begin(); - while (isa<DbgInfoIntrinsic>(I)) - I++; - if (&*I != PredBr) - continue; - // Cannot be the entry block; its label does not get emitted. - if (Pred == &Dest->getParent()->getEntryBlock()) - continue; - - // Finally, since we know that Dest has phi nodes in it, we have to make - // sure that jumping to Pred will have the same effect as going to Dest in - // terms of PHI values. - PHINode *PN; - unsigned PHINo = 0; - unsigned PredEntryNo = pi; - - bool FoundMatch = true; - for (BasicBlock::iterator I = Dest->begin(); - (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) { - if (PHINo == TIPHIValues.size()) { - if (PN->getIncomingBlock(TIBBEntryNo) != TIBB) - TIBBEntryNo = PN->getBasicBlockIndex(TIBB); - TIPHIValues.push_back(PN->getIncomingValue(TIBBEntryNo)); - } - - // If the PHI entry doesn't work, we can't use this pred. - if (PN->getIncomingBlock(PredEntryNo) != Pred) - PredEntryNo = PN->getBasicBlockIndex(Pred); - - if (TIPHIValues[PHINo] != PN->getIncomingValue(PredEntryNo)) { - FoundMatch = false; - break; - } - } - - // If we found a workable predecessor, change TI to branch to Succ. - if (FoundMatch) - return Pred; - } - return 0; -} - - -/// SplitEdgeNicely - Split the critical edge from TI to its specified -/// successor if it will improve codegen. We only do this if the successor has -/// phi nodes (otherwise critical edges are ok). If there is already another -/// predecessor of the succ that is empty (and thus has no phi nodes), use it -/// instead of introducing a new block. -static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, - SmallSet<std::pair<const BasicBlock*, - const BasicBlock*>, 8> &BackEdges, - Pass *P) { - BasicBlock *TIBB = TI->getParent(); - BasicBlock *Dest = TI->getSuccessor(SuccNum); - assert(isa<PHINode>(Dest->begin()) && - "This should only be called if Dest has a PHI!"); - PHINode *DestPHI = cast<PHINode>(Dest->begin()); - - // Do not split edges to EH landing pads. - if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI)) - if (Invoke->getSuccessor(1) == Dest) - return; - - // As a hack, never split backedges of loops. Even though the copy for any - // PHIs inserted on the backedge would be dead for exits from the loop, we - // assume that the cost of *splitting* the backedge would be too high. - if (BackEdges.count(std::make_pair(TIBB, Dest))) - return; - - if (BasicBlock *ReuseBB = FindReusablePredBB(DestPHI, TIBB)) { - ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>(); - if (PFI) - PFI->splitEdge(TIBB, Dest, ReuseBB); - Dest->removePredecessor(TIBB); - TI->setSuccessor(SuccNum, ReuseBB); - return; - } - - SplitCriticalEdge(TI, SuccNum, P, true); -} - - /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop /// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), /// sink it into user blocks to reduce the number of virtual @@ -640,7 +534,8 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { // happens. WeakVH IterHandle(CurInstIterator); - ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, DT); + ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, + ModifiedDT ? 0 : DT); // If the iterator instruction was recursively deleted, start over at the // start of the block. @@ -666,6 +561,129 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { return Simplifier.fold(CI, TD); } +/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return +/// instructions to the predecessor to enable tail call optimizations. The +/// case it is currently looking for is: +/// bb0: +/// %tmp0 = tail call i32 @f0() +/// br label %return +/// bb1: +/// %tmp1 = tail call i32 @f1() +/// br label %return +/// bb2: +/// %tmp2 = tail call i32 @f2() +/// br label %return +/// return: +/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] +/// ret i32 %retval +/// +/// => +/// +/// bb0: +/// %tmp0 = tail call i32 @f0() +/// ret i32 %tmp0 +/// bb1: +/// %tmp1 = tail call i32 @f1() +/// ret i32 %tmp1 +/// bb2: +/// %tmp2 = tail call i32 @f2() +/// ret i32 %tmp2 +/// +bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { + if (!TLI) + return false; + + Value *V = RI->getReturnValue(); + PHINode *PN = V ? dyn_cast<PHINode>(V) : NULL; + if (V && !PN) + return false; + + BasicBlock *BB = RI->getParent(); + if (PN && PN->getParent() != BB) + return false; + + // It's not safe to eliminate the sign / zero extension of the return value. + // See llvm::isInTailCallPosition(). + const Function *F = BB->getParent(); + unsigned CallerRetAttr = F->getAttributes().getRetAttributes(); + if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt)) + return false; + + // Make sure there are no instructions between the PHI and return, or that the + // return is the first instruction in the block. + if (PN) { + BasicBlock::iterator BI = BB->begin(); + do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); + if (&*BI != RI) + return false; + } else { + BasicBlock::iterator BI = BB->begin(); + while (isa<DbgInfoIntrinsic>(BI)) ++BI; + if (&*BI != RI) + return false; + } + + /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail + /// call. + SmallVector<CallInst*, 4> TailCalls; + if (PN) { + for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { + CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I)); + // Make sure the phi value is indeed produced by the tail call. + if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && + TLI->mayBeEmittedAsTailCall(CI)) + TailCalls.push_back(CI); + } + } else { + SmallPtrSet<BasicBlock*, 4> VisitedBBs; + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { + if (!VisitedBBs.insert(*PI)) + continue; + + BasicBlock::InstListType &InstList = (*PI)->getInstList(); + BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); + BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); + do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI)); + if (RI == RE) + continue; + + CallInst *CI = dyn_cast<CallInst>(&*RI); + if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI)) + TailCalls.push_back(CI); + } + } + + bool Changed = false; + for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { + CallInst *CI = TailCalls[i]; + CallSite CS(CI); + + // Conservatively require the attributes of the call to match those of the + // return. Ignore noalias because it doesn't affect the call sequence. + unsigned CalleeRetAttr = CS.getAttributes().getRetAttributes(); + if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) + continue; + + // Make sure the call instruction is followed by an unconditional branch to + // the return block. + BasicBlock *CallBB = CI->getParent(); + BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator()); + if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) + continue; + + // Duplicate the return into CallBB. + (void)FoldReturnIntoUncondBranch(RI, BB, CallBB); + ModifiedDT = Changed = true; + ++NumRetsDup; + } + + // If we eliminated all predecessors of the block, delete the block now. + if (Changed && pred_begin(BB) == pred_end(BB)) + BB->eraseFromParent(); + + return Changed; +} + //===----------------------------------------------------------------------===// // Memory Optimization //===----------------------------------------------------------------------===// @@ -701,7 +719,8 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // the addressing mode obtained from the non-PHI roots of the graph // are equivalent. Value *Consensus = 0; - unsigned NumUses = 0; + unsigned NumUsesConsensus = 0; + bool IsNumUsesConsensusValid = false; SmallVector<Instruction*, 16> AddrModeInsts; ExtAddrMode AddrMode; while (!worklist.empty()) { @@ -728,16 +747,31 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(V, AccessTy,MemoryInst, NewAddrModeInsts, *TLI); - - // Ensure that the obtained addressing mode is equivalent to that obtained - // for all other roots of the PHI traversal. Also, when choosing one - // such root as representative, select the one with the most uses in order - // to keep the cost modeling heuristics in AddressingModeMatcher applicable. - if (!Consensus || NewAddrMode == AddrMode) { - if (V->getNumUses() > NumUses) { + + // This check is broken into two cases with very similar code to avoid using + // getNumUses() as much as possible. Some values have a lot of uses, so + // calling getNumUses() unconditionally caused a significant compile-time + // regression. + if (!Consensus) { + Consensus = V; + AddrMode = NewAddrMode; + AddrModeInsts = NewAddrModeInsts; + continue; + } else if (NewAddrMode == AddrMode) { + if (!IsNumUsesConsensusValid) { + NumUsesConsensus = Consensus->getNumUses(); + IsNumUsesConsensusValid = true; + } + + // Ensure that the obtained addressing mode is equivalent to that obtained + // for all other roots of the PHI traversal. Also, when choosing one + // such root as representative, select the one with the most uses in order + // to keep the cost modeling heuristics in AddressingModeMatcher + // applicable. + unsigned NumUses = V->getNumUses(); + if (NumUses > NumUsesConsensus) { Consensus = V; - NumUses = V->getNumUses(); - AddrMode = NewAddrMode; + NumUsesConsensus = NumUses; AddrModeInsts = NewAddrModeInsts; } continue; @@ -855,11 +889,26 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, MemoryInst->replaceUsesOfWith(Repl, SunkAddr); + // If we have no uses, recursively delete the value and all dead instructions + // using it. if (Repl->use_empty()) { + // This can cause recursive deletion, which can invalidate our iterator. + // Use a WeakVH to hold onto it in case this happens. + WeakVH IterHandle(CurInstIterator); + BasicBlock *BB = CurInstIterator->getParent(); + RecursivelyDeleteTriviallyDeadInstructions(Repl); - // This address is now available for reassignment, so erase the table entry; - // we don't want to match some completely different instruction. - SunkAddrs[Addr] = 0; + + if (IterHandle != CurInstIterator) { + // If the iterator instruction was recursively deleted, start over at the + // start of the block. + CurInstIterator = BB->begin(); + SunkAddrs.clear(); + } else { + // This address is now available for reassignment, so erase the table + // entry; we don't want to match some completely different instruction. + SunkAddrs[Addr] = 0; + } } ++NumMemoryInsts; return true; @@ -1073,6 +1122,9 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { if (CallInst *CI = dyn_cast<CallInst>(I)) return OptimizeCallInst(CI); + if (ReturnInst *RI = dyn_cast<ReturnInst>(I)) + return DupRetToEnableTailCallOpts(RI); + return false; } @@ -1080,21 +1132,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { // across basic blocks and rewrite them to improve basic-block-at-a-time // selection. bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { - bool MadeChange = false; - - // Split all critical edges where the dest block has a PHI. - if (CriticalEdgeSplit) { - TerminatorInst *BBTI = BB.getTerminator(); - if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) { - for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) { - BasicBlock *SuccBB = BBTI->getSuccessor(i); - if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true)) - SplitEdgeNicely(BBTI, i, BackEdges, this); - } - } - } - SunkAddrs.clear(); + bool MadeChange = false; CurInstIterator = BB.begin(); for (BasicBlock::iterator E = BB.end(); CurInstIterator != E; ) diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index be12973..e275268 100644 --- a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -13,6 +13,7 @@ #define DEBUG_TYPE "correlated-value-propagation" #include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/Pass.h" diff --git a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp index dbb68f3..8dbcc23 100644 --- a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp @@ -23,7 +23,6 @@ #include "llvm/Pass.h" #include "llvm/Support/InstIterator.h" #include "llvm/ADT/Statistic.h" -#include <set> using namespace llvm; STATISTIC(DIEEliminated, "Number of insts removed by DIE pass"); diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 867a06a..53e4640 100644 --- a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -340,24 +340,35 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later, // Okay, we have stores to two completely different pointers. Try to // decompose the pointer into a "base + constant_offset" form. If the base // pointers are equal, then we can reason about the two stores. - int64_t Off1 = 0, Off2 = 0; - const Value *BP1 = GetPointerBaseWithConstantOffset(P1, Off1, TD); - const Value *BP2 = GetPointerBaseWithConstantOffset(P2, Off2, TD); + int64_t EarlierOff = 0, LaterOff = 0; + const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD); + const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD); // If the base pointers still differ, we have two completely different stores. if (BP1 != BP2) return false; - - // Otherwise, we might have a situation like: - // store i16 -> P + 1 Byte - // store i32 -> P - // In this case, we see if the later store completely overlaps all bytes - // stored by the previous store. - if (Off1 < Off2 || // Earlier starts before Later. - Off1+Earlier.Size > Off2+Later.Size) // Earlier goes beyond Later. - return false; - // Otherwise, we have complete overlap. - return true; + + // The later store completely overlaps the earlier store if: + // + // 1. Both start at the same offset and the later one's size is greater than + // or equal to the earlier one's, or + // + // |--earlier--| + // |-- later --| + // + // 2. The earlier store has an offset greater than the later offset, but which + // still lies completely within the later store. + // + // |--earlier--| + // |----- later ------| + // + // We have to be careful here as *Off is signed while *.Size is unsigned. + if (EarlierOff >= LaterOff && + uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size) + return true; + + // Otherwise, they don't completely overlap. + return false; } /// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a @@ -474,7 +485,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { // away the store and we bail out. However, if we depend on on something // that overwrites the memory location we *can* potentially optimize it. // - // Find out what memory location the dependant instruction stores. + // Find out what memory location the dependent instruction stores. Instruction *DepWrite = InstDep.getInst(); AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA); // If we didn't get a useful location, or if it isn't a size, bail out. @@ -631,28 +642,15 @@ bool DSE::handleEndBlock(BasicBlock &BB) { if (AA->doesNotAccessMemory(CS)) continue; - unsigned NumModRef = 0, NumOther = 0; - // If the call might load from any of our allocas, then any store above // the call is live. SmallVector<Value*, 8> LiveAllocas; for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(), E = DeadStackObjects.end(); I != E; ++I) { - // If we detect that our AA is imprecise, it's not worth it to scan the - // rest of the DeadPointers set. Just assume that the AA will return - // ModRef for everything, and go ahead and bail out. - if (NumModRef >= 16 && NumOther == 0) - return MadeChange; - // See if the call site touches it. AliasAnalysis::ModRefResult A = AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA)); - if (A == AliasAnalysis::ModRef) - ++NumModRef; - else - ++NumOther; - if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref) LiveAllocas.push_back(*I); } diff --git a/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp b/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp deleted file mode 100644 index 4c3d188..0000000 --- a/contrib/llvm/lib/Transforms/Scalar/GEPSplitter.cpp +++ /dev/null @@ -1,83 +0,0 @@ -//===- GEPSplitter.cpp - Split complex GEPs into simple ones --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This function breaks GEPs with more than 2 non-zero operands into smaller -// GEPs each with no more than 2 non-zero operands. This exposes redundancy -// between GEPs with common initial operand sequences. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "split-geps" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/Pass.h" -using namespace llvm; - -namespace { - class GEPSplitter : public FunctionPass { - virtual bool runOnFunction(Function &F); - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - public: - static char ID; // Pass identification, replacement for typeid - explicit GEPSplitter() : FunctionPass(ID) { - initializeGEPSplitterPass(*PassRegistry::getPassRegistry()); - } - }; -} - -char GEPSplitter::ID = 0; -INITIALIZE_PASS(GEPSplitter, "split-geps", - "split complex GEPs into simple GEPs", false, false) - -FunctionPass *llvm::createGEPSplitterPass() { - return new GEPSplitter(); -} - -bool GEPSplitter::runOnFunction(Function &F) { - bool Changed = false; - - // Visit each GEP instruction. - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) - for (BasicBlock::iterator II = I->begin(), IE = I->end(); II != IE; ) - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(II++)) { - unsigned NumOps = GEP->getNumOperands(); - // Ignore GEPs which are already simple. - if (NumOps <= 2) - continue; - bool FirstIndexIsZero = isa<ConstantInt>(GEP->getOperand(1)) && - cast<ConstantInt>(GEP->getOperand(1))->isZero(); - if (NumOps == 3 && FirstIndexIsZero) - continue; - // The first index is special and gets expanded with a 2-operand GEP - // (unless it's zero, in which case we can skip this). - Value *NewGEP = FirstIndexIsZero ? - GEP->getOperand(0) : - GetElementPtrInst::Create(GEP->getOperand(0), GEP->getOperand(1), - "tmp", GEP); - // All remaining indices get expanded with a 3-operand GEP with zero - // as the second operand. - Value *Idxs[2]; - Idxs[0] = ConstantInt::get(Type::getInt64Ty(F.getContext()), 0); - for (unsigned i = 2; i != NumOps; ++i) { - Idxs[1] = GEP->getOperand(i); - NewGEP = GetElementPtrInst::Create(NewGEP, Idxs, Idxs+2, "tmp", GEP); - } - GEP->replaceAllUsesWith(NewGEP); - GEP->eraseFromParent(); - Changed = true; - } - - return Changed; -} - -void GEPSplitter::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); -} diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp index a0123f5..efecb97 100644 --- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp @@ -63,50 +63,48 @@ static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); namespace { struct Expression { uint32_t opcode; - const Type* type; + const Type *type; SmallVector<uint32_t, 4> varargs; - Expression() { } - Expression(uint32_t o) : opcode(o) { } + Expression(uint32_t o = ~2U) : opcode(o) { } bool operator==(const Expression &other) const { if (opcode != other.opcode) return false; - else if (opcode == ~0U || opcode == ~1U) + if (opcode == ~0U || opcode == ~1U) return true; - else if (type != other.type) + if (type != other.type) return false; - else if (varargs != other.varargs) + if (varargs != other.varargs) return false; return true; } }; class ValueTable { - private: - DenseMap<Value*, uint32_t> valueNumbering; - DenseMap<Expression, uint32_t> expressionNumbering; - AliasAnalysis* AA; - MemoryDependenceAnalysis* MD; - DominatorTree* DT; - - uint32_t nextValueNumber; - - Expression create_expression(Instruction* I); - uint32_t lookup_or_add_call(CallInst* C); - public: - ValueTable() : nextValueNumber(1) { } - uint32_t lookup_or_add(Value *V); - uint32_t lookup(Value *V) const; - void add(Value *V, uint32_t num); - void clear(); - void erase(Value *v); - void setAliasAnalysis(AliasAnalysis* A) { AA = A; } - AliasAnalysis *getAliasAnalysis() const { return AA; } - void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } - void setDomTree(DominatorTree* D) { DT = D; } - uint32_t getNextUnusedValueNumber() { return nextValueNumber; } - void verifyRemoved(const Value *) const; + DenseMap<Value*, uint32_t> valueNumbering; + DenseMap<Expression, uint32_t> expressionNumbering; + AliasAnalysis *AA; + MemoryDependenceAnalysis *MD; + DominatorTree *DT; + + uint32_t nextValueNumber; + + Expression create_expression(Instruction* I); + uint32_t lookup_or_add_call(CallInst* C); + public: + ValueTable() : nextValueNumber(1) { } + uint32_t lookup_or_add(Value *V); + uint32_t lookup(Value *V) const; + void add(Value *V, uint32_t num); + void clear(); + void erase(Value *v); + void setAliasAnalysis(AliasAnalysis* A) { AA = A; } + AliasAnalysis *getAliasAnalysis() const { return AA; } + void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } + void setDomTree(DominatorTree* D) { DT = D; } + uint32_t getNextUnusedValueNumber() { return nextValueNumber; } + void verifyRemoved(const Value *) const; }; } @@ -364,14 +362,14 @@ uint32_t ValueTable::lookup(Value *V) const { return VI->second; } -/// clear - Remove all entries from the ValueTable +/// clear - Remove all entries from the ValueTable. void ValueTable::clear() { valueNumbering.clear(); expressionNumbering.clear(); nextValueNumber = 1; } -/// erase - Remove a value from the value numbering +/// erase - Remove a value from the value numbering. void ValueTable::erase(Value *V) { valueNumbering.erase(V); } @@ -392,20 +390,11 @@ void ValueTable::verifyRemoved(const Value *V) const { namespace { class GVN : public FunctionPass { - bool runOnFunction(Function &F); - public: - static char ID; // Pass identification, replacement for typeid - explicit GVN(bool noloads = false) - : FunctionPass(ID), NoLoads(noloads), MD(0) { - initializeGVNPass(*PassRegistry::getPassRegistry()); - } - - private: bool NoLoads; MemoryDependenceAnalysis *MD; DominatorTree *DT; - const TargetData* TD; - + const TargetData *TD; + ValueTable VN; /// LeaderTable - A mapping from value numbers to lists of Value*'s that @@ -418,17 +407,39 @@ namespace { DenseMap<uint32_t, LeaderTableEntry> LeaderTable; BumpPtrAllocator TableAllocator; + SmallVector<Instruction*, 8> InstrsToErase; + public: + static char ID; // Pass identification, replacement for typeid + explicit GVN(bool noloads = false) + : FunctionPass(ID), NoLoads(noloads), MD(0) { + initializeGVNPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F); + + /// markInstructionForDeletion - This removes the specified instruction from + /// our various maps and marks it for deletion. + void markInstructionForDeletion(Instruction *I) { + VN.erase(I); + InstrsToErase.push_back(I); + } + + const TargetData *getTargetData() const { return TD; } + DominatorTree &getDominatorTree() const { return *DT; } + AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); } + MemoryDependenceAnalysis &getMemDep() const { return *MD; } + private: /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for /// its value number. void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) { - LeaderTableEntry& Curr = LeaderTable[N]; + LeaderTableEntry &Curr = LeaderTable[N]; if (!Curr.Val) { Curr.Val = V; Curr.BB = BB; return; } - LeaderTableEntry* Node = TableAllocator.Allocate<LeaderTableEntry>(); + LeaderTableEntry *Node = TableAllocator.Allocate<LeaderTableEntry>(); Node->Val = V; Node->BB = BB; Node->Next = Curr.Next; @@ -474,19 +485,17 @@ namespace { AU.addPreserved<DominatorTree>(); AU.addPreserved<AliasAnalysis>(); } + // Helper fuctions // FIXME: eliminate or document these better - bool processLoad(LoadInst* L, - SmallVectorImpl<Instruction*> &toErase); - bool processInstruction(Instruction *I, - SmallVectorImpl<Instruction*> &toErase); - bool processNonLocalLoad(LoadInst* L, - SmallVectorImpl<Instruction*> &toErase); + bool processLoad(LoadInst *L); + bool processInstruction(Instruction *I); + bool processNonLocalLoad(LoadInst *L); bool processBlock(BasicBlock *BB); - void dump(DenseMap<uint32_t, Value*>& d); + void dump(DenseMap<uint32_t, Value*> &d); bool iterateOnFunction(Function &F); - bool performPRE(Function& F); + bool performPRE(Function &F); Value *findLeader(BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; @@ -629,17 +638,17 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD)) return 0; + // If this is already the right type, just return it. const Type *StoredValTy = StoredVal->getType(); uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy); - uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); + uint64_t LoadSize = TD.getTypeStoreSizeInBits(LoadedTy); // If the store and reload are the same size, we can always reuse it. if (StoreSize == LoadSize) { - if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) { - // Pointer to Pointer -> use bitcast. + // Pointer to Pointer -> use bitcast. + if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); - } // Convert source pointers to integers, which can be bitcast. if (StoredValTy->isPointerTy()) { @@ -796,6 +805,36 @@ static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr, StorePtr, StoreSize, TD); } +/// AnalyzeLoadFromClobberingLoad - This function is called when we have a +/// memdep query of a load that ends up being clobbered by another load. See if +/// the other load can feed into the second load. +static int AnalyzeLoadFromClobberingLoad(const Type *LoadTy, Value *LoadPtr, + LoadInst *DepLI, const TargetData &TD){ + // Cannot handle reading from store of first-class aggregate yet. + if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) + return -1; + + Value *DepPtr = DepLI->getPointerOperand(); + uint64_t DepSize = TD.getTypeSizeInBits(DepLI->getType()); + int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD); + if (R != -1) return R; + + // If we have a load/load clobber an DepLI can be widened to cover this load, + // then we should widen it! + int64_t LoadOffs = 0; + const Value *LoadBase = + GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD); + unsigned LoadSize = TD.getTypeStoreSize(LoadTy); + + unsigned Size = MemoryDependenceAnalysis:: + getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, TD); + if (Size == 0) return -1; + + return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, TD); +} + + + static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, MemIntrinsic *MI, const TargetData &TD) { @@ -843,9 +882,9 @@ static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, /// GetStoreValueForLoad - This function is called when we have a /// memdep query of a load that ends up being a clobbering store. This means -/// that the store *may* provide bits used by the load but we can't be sure -/// because the pointers don't mustalias. Check this case to see if there is -/// anything more we can do before we give up. +/// that the store provides bits used by the load but we the pointers don't +/// mustalias. Check this case to see if there is anything more we can do +/// before we give up. static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, const Type *LoadTy, Instruction *InsertPt, const TargetData &TD){ @@ -881,6 +920,69 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD); } +/// GetStoreValueForLoad - This function is called when we have a +/// memdep query of a load that ends up being a clobbering load. This means +/// that the load *may* provide bits used by the load but we can't be sure +/// because the pointers don't mustalias. Check this case to see if there is +/// anything more we can do before we give up. +static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, + const Type *LoadTy, Instruction *InsertPt, + GVN &gvn) { + const TargetData &TD = *gvn.getTargetData(); + // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to + // widen SrcVal out to a larger load. + unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType()); + unsigned LoadSize = TD.getTypeStoreSize(LoadTy); + if (Offset+LoadSize > SrcValSize) { + assert(!SrcVal->isVolatile() && "Cannot widen volatile load!"); + assert(isa<IntegerType>(SrcVal->getType())&&"Can't widen non-integer load"); + // If we have a load/load clobber an DepLI can be widened to cover this + // load, then we should widen it to the next power of 2 size big enough! + unsigned NewLoadSize = Offset+LoadSize; + if (!isPowerOf2_32(NewLoadSize)) + NewLoadSize = NextPowerOf2(NewLoadSize); + + Value *PtrVal = SrcVal->getPointerOperand(); + + // Insert the new load after the old load. This ensures that subsequent + // memdep queries will find the new load. We can't easily remove the old + // load completely because it is already in the value numbering table. + IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal)); + const Type *DestPTy = + IntegerType::get(LoadTy->getContext(), NewLoadSize*8); + DestPTy = PointerType::get(DestPTy, + cast<PointerType>(PtrVal->getType())->getAddressSpace()); + + PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); + LoadInst *NewLoad = Builder.CreateLoad(PtrVal); + NewLoad->takeName(SrcVal); + NewLoad->setAlignment(SrcVal->getAlignment()); + + DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); + DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); + + // Replace uses of the original load with the wider load. On a big endian + // system, we need to shift down to get the relevant bits. + Value *RV = NewLoad; + if (TD.isBigEndian()) + RV = Builder.CreateLShr(RV, + NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits()); + RV = Builder.CreateTrunc(RV, SrcVal->getType()); + SrcVal->replaceAllUsesWith(RV); + + // We would like to use gvn.markInstructionForDeletion here, but we can't + // because the load is already memoized into the leader map table that GVN + // tracks. It is potentially possible to remove the load from the table, + // but then there all of the operations based on it would need to be + // rehashed. Just leave the dead load around. + gvn.getMemDep().removeInstruction(SrcVal); + SrcVal = NewLoad; + } + + return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, TD); +} + + /// GetMemInstValueForLoad - This function is called when we have a /// memdep query of a load that ends up being a clobbering mem intrinsic. static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, @@ -943,11 +1045,12 @@ struct AvailableValueInBlock { BasicBlock *BB; enum ValType { SimpleVal, // A simple offsetted value that is accessed. + LoadVal, // A value produced by a load. MemIntrin // A memory intrinsic which is loaded from. }; /// V - The value that is live out of the block. - PointerIntPair<Value *, 1, ValType> Val; + PointerIntPair<Value *, 2, ValType> Val; /// Offset - The byte offset in Val that is interesting for the load query. unsigned Offset; @@ -972,37 +1075,69 @@ struct AvailableValueInBlock { return Res; } + static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI, + unsigned Offset = 0) { + AvailableValueInBlock Res; + Res.BB = BB; + Res.Val.setPointer(LI); + Res.Val.setInt(LoadVal); + Res.Offset = Offset; + return Res; + } + bool isSimpleValue() const { return Val.getInt() == SimpleVal; } + bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; } + bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; } + Value *getSimpleValue() const { assert(isSimpleValue() && "Wrong accessor"); return Val.getPointer(); } + LoadInst *getCoercedLoadValue() const { + assert(isCoercedLoadValue() && "Wrong accessor"); + return cast<LoadInst>(Val.getPointer()); + } + MemIntrinsic *getMemIntrinValue() const { - assert(!isSimpleValue() && "Wrong accessor"); + assert(isMemIntrinValue() && "Wrong accessor"); return cast<MemIntrinsic>(Val.getPointer()); } /// MaterializeAdjustedValue - Emit code into this block to adjust the value /// defined here to the specified type. This handles various coercion cases. - Value *MaterializeAdjustedValue(const Type *LoadTy, - const TargetData *TD) const { + Value *MaterializeAdjustedValue(const Type *LoadTy, GVN &gvn) const { Value *Res; if (isSimpleValue()) { Res = getSimpleValue(); if (Res->getType() != LoadTy) { + const TargetData *TD = gvn.getTargetData(); assert(TD && "Need target data to handle type mismatch case"); Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), *TD); - DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " + DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " << *getSimpleValue() << '\n' << *Res << '\n' << "\n\n\n"); } + } else if (isCoercedLoadValue()) { + LoadInst *Load = getCoercedLoadValue(); + if (Load->getType() == LoadTy && Offset == 0) { + Res = Load; + } else { + Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(), + gvn); + + DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " " + << *getCoercedLoadValue() << '\n' + << *Res << '\n' << "\n\n\n"); + } } else { + const TargetData *TD = gvn.getTargetData(); + assert(TD && "Need target data to handle type mismatch case"); Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy, BB->getTerminator(), *TD); - DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset + DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset << " " << *getMemIntrinValue() << '\n' << *Res << '\n' << "\n\n\n"); } @@ -1010,21 +1145,20 @@ struct AvailableValueInBlock { } }; -} +} // end anonymous namespace /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, /// construct SSA form, allowing us to eliminate LI. This returns the value /// that should be used at LI's definition site. static Value *ConstructSSAForLoadSet(LoadInst *LI, SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, - const TargetData *TD, - const DominatorTree &DT, - AliasAnalysis *AA) { + GVN &gvn) { // Check for the fully redundant, dominating load case. In this case, we can // just use the dominating value directly. if (ValuesPerBlock.size() == 1 && - DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent())) - return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD); + gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB, + LI->getParent())) + return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn); // Otherwise, we have to construct SSA form. SmallVector<PHINode*, 8> NewPHIs; @@ -1040,14 +1174,16 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, if (SSAUpdate.HasValueForBlock(BB)) continue; - SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD)); + SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn)); } // Perform PHI construction. Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); // If new PHI nodes were created, notify alias analysis. - if (V->getType()->isPointerTy()) + if (V->getType()->isPointerTy()) { + AliasAnalysis *AA = gvn.getAliasAnalysis(); + for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) AA->copyValue(LI, NewPHIs[i]); @@ -1059,6 +1195,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) AA->addEscapingUse(P->getOperandUse(2*ii)); } + } return V; } @@ -1071,8 +1208,7 @@ static bool isLifetimeStart(const Instruction *Inst) { /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are /// non-local by performing PHI construction. -bool GVN::processNonLocalLoad(LoadInst *LI, - SmallVectorImpl<Instruction*> &toErase) { +bool GVN::processNonLocalLoad(LoadInst *LI) { // Find the non-local dependencies of the load. SmallVector<NonLocalDepResult, 64> Deps; AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI); @@ -1088,7 +1224,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // If we had a phi translation failure, we'll have a single entry which is a // clobber in the current block. Reject this early. - if (Deps.size() == 1 && Deps[0].getResult().isClobber()) { + if (Deps.size() == 1 && Deps[0].getResult().isClobber() && + Deps[0].getResult().getInst()->getParent() == LI->getParent()) { DEBUG( dbgs() << "GVN: non-local load "; WriteAsOperand(dbgs(), LI); @@ -1129,6 +1266,26 @@ bool GVN::processNonLocalLoad(LoadInst *LI, } } } + + // Check to see if we have something like this: + // load i32* P + // load i8* (P+1) + // if we have this, replace the later with an extraction from the former. + if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) { + // If this is a clobber and L is the first instruction in its block, then + // we have the first instruction in the entry block. + if (DepLI != LI && Address && TD) { + int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), + LI->getPointerOperand(), + DepLI, *TD); + + if (Offset != -1) { + ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI, + Offset)); + continue; + } + } + } // If the clobbering value is a memset/memcpy/memmove, see if we can // forward a value on from it. @@ -1187,7 +1344,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, continue; } } - ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD)); + ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD)); continue; } @@ -1206,16 +1363,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI, DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); // Perform PHI construction. - Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, - VN.getAliasAnalysis()); + Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); LI->replaceAllUsesWith(V); if (isa<PHINode>(V)) V->takeName(LI); if (V->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); - VN.erase(LI); - toErase.push_back(LI); + markInstructionForDeletion(LI); ++NumGVNLoad; return true; } @@ -1429,22 +1584,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI, } // Perform PHI construction. - Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, - VN.getAliasAnalysis()); + Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); LI->replaceAllUsesWith(V); if (isa<PHINode>(V)) V->takeName(LI); if (V->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); - VN.erase(LI); - toErase.push_back(LI); + markInstructionForDeletion(LI); ++NumPRELoad; return true; } /// processLoad - Attempt to eliminate a load, first by eliminating it /// locally, and then attempting non-local elimination if that fails. -bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { +bool GVN::processLoad(LoadInst *L) { if (!MD) return false; @@ -1454,8 +1607,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // ... to a pointer that has been loaded from before... MemDepResult Dep = MD->getDependency(L); - // If the value isn't available, don't do anything! - if (Dep.isClobber()) { + // If we have a clobber and target data is around, see if this is a clobber + // that we can fix up through code synthesis. + if (Dep.isClobber() && TD) { // Check to see if we have something like this: // store i32 123, i32* %P // %A = bitcast i32* %P to i8* @@ -1467,26 +1621,40 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // completely covers this load. This sort of thing can happen in bitfield // access code. Value *AvailVal = 0; - if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) - if (TD) { - int Offset = AnalyzeLoadFromClobberingStore(L->getType(), - L->getPointerOperand(), - DepSI, *TD); - if (Offset != -1) - AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, - L->getType(), L, *TD); - } + if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) { + int Offset = AnalyzeLoadFromClobberingStore(L->getType(), + L->getPointerOperand(), + DepSI, *TD); + if (Offset != -1) + AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, + L->getType(), L, *TD); + } + + // Check to see if we have something like this: + // load i32* P + // load i8* (P+1) + // if we have this, replace the later with an extraction from the former. + if (LoadInst *DepLI = dyn_cast<LoadInst>(Dep.getInst())) { + // If this is a clobber and L is the first instruction in its block, then + // we have the first instruction in the entry block. + if (DepLI == L) + return false; + + int Offset = AnalyzeLoadFromClobberingLoad(L->getType(), + L->getPointerOperand(), + DepLI, *TD); + if (Offset != -1) + AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this); + } // If the clobbering value is a memset/memcpy/memmove, see if we can forward // a value on from it. if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) { - if (TD) { - int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), - L->getPointerOperand(), - DepMI, *TD); - if (Offset != -1) - AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD); - } + int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), + L->getPointerOperand(), + DepMI, *TD); + if (Offset != -1) + AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *TD); } if (AvailVal) { @@ -1497,14 +1665,16 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { L->replaceAllUsesWith(AvailVal); if (AvailVal->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(AvailVal); - VN.erase(L); - toErase.push_back(L); + markInstructionForDeletion(L); ++NumGVNLoad; return true; } - + } + + // If the value isn't available, don't do anything! + if (Dep.isClobber()) { DEBUG( - // fast print dep, using operator<< on instruction would be too slow + // fast print dep, using operator<< on instruction is too slow. dbgs() << "GVN: load "; WriteAsOperand(dbgs(), L); Instruction *I = Dep.getInst(); @@ -1515,7 +1685,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // If it is defined in another block, try harder. if (Dep.isNonLocal()) - return processNonLocalLoad(L, toErase); + return processNonLocalLoad(L); Instruction *DepInst = Dep.getInst(); if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { @@ -1542,8 +1712,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { L->replaceAllUsesWith(StoredVal); if (StoredVal->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(StoredVal); - VN.erase(L); - toErase.push_back(L); + markInstructionForDeletion(L); ++NumGVNLoad; return true; } @@ -1556,7 +1725,8 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // (depending on its type). if (DepLI->getType() != L->getType()) { if (TD) { - AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD); + AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), + L, *TD); if (AvailableVal == 0) return false; @@ -1571,8 +1741,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { L->replaceAllUsesWith(AvailableVal); if (DepLI->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(DepLI); - VN.erase(L); - toErase.push_back(L); + markInstructionForDeletion(L); ++NumGVNLoad; return true; } @@ -1582,19 +1751,17 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // intervening stores, for example. if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) { L->replaceAllUsesWith(UndefValue::get(L->getType())); - VN.erase(L); - toErase.push_back(L); + markInstructionForDeletion(L); ++NumGVNLoad; return true; } // If this load occurs either right after a lifetime begin, // then the loaded value is undefined. - if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) { + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DepInst)) { if (II->getIntrinsicID() == Intrinsic::lifetime_start) { L->replaceAllUsesWith(UndefValue::get(L->getType())); - VN.erase(L); - toErase.push_back(L); + markInstructionForDeletion(L); ++NumGVNLoad; return true; } @@ -1634,8 +1801,7 @@ Value *GVN::findLeader(BasicBlock *BB, uint32_t num) { /// processInstruction - When calculating availability, handle an instruction /// by inserting it into the appropriate sets -bool GVN::processInstruction(Instruction *I, - SmallVectorImpl<Instruction*> &toErase) { +bool GVN::processInstruction(Instruction *I) { // Ignore dbg info intrinsics. if (isa<DbgInfoIntrinsic>(I)) return false; @@ -1648,20 +1814,17 @@ bool GVN::processInstruction(Instruction *I, I->replaceAllUsesWith(V); if (MD && V->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); - VN.erase(I); - toErase.push_back(I); + markInstructionForDeletion(I); return true; } if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - bool Changed = processLoad(LI, toErase); - - if (!Changed) { - unsigned Num = VN.lookup_or_add(LI); - addToLeaderTable(Num, LI, LI->getParent()); - } + if (processLoad(LI)) + return true; - return Changed; + unsigned Num = VN.lookup_or_add(LI); + addToLeaderTable(Num, LI, LI->getParent()); + return false; } // For conditions branches, we can perform simple conditional propagation on @@ -1720,11 +1883,10 @@ bool GVN::processInstruction(Instruction *I, } // Remove it! - VN.erase(I); I->replaceAllUsesWith(repl); if (MD && repl->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(repl); - toErase.push_back(I); + markInstructionForDeletion(I); return true; } @@ -1781,35 +1943,36 @@ bool GVN::runOnFunction(Function& F) { bool GVN::processBlock(BasicBlock *BB) { - // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and - // incrementing BI before processing an instruction). - SmallVector<Instruction*, 8> toErase; + // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function + // (and incrementing BI before processing an instruction). + assert(InstrsToErase.empty() && + "We expect InstrsToErase to be empty across iterations"); bool ChangedFunction = false; for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { - ChangedFunction |= processInstruction(BI, toErase); - if (toErase.empty()) { + ChangedFunction |= processInstruction(BI); + if (InstrsToErase.empty()) { ++BI; continue; } // If we need some instructions deleted, do it now. - NumGVNInstr += toErase.size(); + NumGVNInstr += InstrsToErase.size(); // Avoid iterator invalidation. bool AtStart = BI == BB->begin(); if (!AtStart) --BI; - for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(), - E = toErase.end(); I != E; ++I) { + for (SmallVector<Instruction*, 4>::iterator I = InstrsToErase.begin(), + E = InstrsToErase.end(); I != E; ++I) { DEBUG(dbgs() << "GVN removed: " << **I << '\n'); if (MD) MD->removeInstruction(*I); (*I)->eraseFromParent(); DEBUG(verifyRemoved(*I)); } - toErase.clear(); + InstrsToErase.clear(); if (AtStart) BI = BB->begin(); @@ -1944,11 +2107,11 @@ bool GVN::performPRE(Function &F) { addToLeaderTable(ValNo, PREInstr, PREPred); // Create a PHI to make the value available in this block. - PHINode* Phi = PHINode::Create(CurInst->getType(), + pred_iterator PB = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); + PHINode* Phi = PHINode::Create(CurInst->getType(), std::distance(PB, PE), CurInst->getName() + ".pre-phi", CurrentBlock->begin()); - for (pred_iterator PI = pred_begin(CurrentBlock), - PE = pred_end(CurrentBlock); PI != PE; ++PI) { + for (pred_iterator PI = PB; PI != PE; ++PI) { BasicBlock *P = *PI; Phi->addIncoming(predMap[P], P); } diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 0fb6798..09d569a 100644 --- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -73,6 +73,7 @@ namespace { LoopInfo *LI; ScalarEvolution *SE; DominatorTree *DT; + SmallVector<WeakVH, 16> DeadInsts; bool Changed; public: @@ -98,6 +99,7 @@ namespace { } private: + bool isValidRewrite(Value *FromVal, Value *ToVal); void EliminateIVComparisons(); void EliminateIVRemainders(); @@ -134,6 +136,53 @@ Pass *llvm::createIndVarSimplifyPass() { return new IndVarSimplify(); } +/// isValidRewrite - Return true if the SCEV expansion generated by the +/// rewriter can replace the original value. SCEV guarantees that it +/// produces the same value, but the way it is produced may be illegal IR. +/// Ideally, this function will only be called for verification. +bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { + // If an SCEV expression subsumed multiple pointers, its expansion could + // reassociate the GEP changing the base pointer. This is illegal because the + // final address produced by a GEP chain must be inbounds relative to its + // underlying object. Otherwise basic alias analysis, among other things, + // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid + // producing an expression involving multiple pointers. Until then, we must + // bail out here. + // + // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject + // because it understands lcssa phis while SCEV does not. + Value *FromPtr = FromVal; + Value *ToPtr = ToVal; + if (GEPOperator *GEP = dyn_cast<GEPOperator>(FromVal)) { + FromPtr = GEP->getPointerOperand(); + } + if (GEPOperator *GEP = dyn_cast<GEPOperator>(ToVal)) { + ToPtr = GEP->getPointerOperand(); + } + if (FromPtr != FromVal || ToPtr != ToVal) { + // Quickly check the common case + if (FromPtr == ToPtr) + return true; + + // SCEV may have rewritten an expression that produces the GEP's pointer + // operand. That's ok as long as the pointer operand has the same base + // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the + // base of a recurrence. This handles the case in which SCEV expansion + // converts a pointer type recurrence into a nonrecurrent pointer base + // indexed by an integer recurrence. + const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); + const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); + if (FromBase == ToBase) + return true; + + DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " + << *FromBase << " != " << *ToBase << "\n"); + + return false; + } + return true; +} + /// LinearFunctionTestReplace - This method rewrites the exit condition of the /// loop to be a canonical != comparison against the incremented loop induction /// variable. This pass is able to rewrite the exit tests of any loop where the @@ -226,7 +275,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, // update the branch to use the new comparison; in the common case this // will make old comparison dead. BI->setCondition(Cond); - RecursivelyDeleteTriviallyDeadInstructions(OrigCond); + DeadInsts.push_back(OrigCond); ++NumLFTR; Changed = true; @@ -304,14 +353,18 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { if (!SE->isLoopInvariant(ExitValue, L)) continue; - Changed = true; - ++NumReplaced; - Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' << " LoopVal = " << *Inst << "\n"); + if (!isValidRewrite(Inst, ExitVal)) { + DeadInsts.push_back(ExitVal); + continue; + } + Changed = true; + ++NumReplaced; + PN->setIncomingValue(i, ExitVal); // If this instruction is dead now, delete it. @@ -366,8 +419,6 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { } void IndVarSimplify::EliminateIVComparisons() { - SmallVector<WeakVH, 16> DeadInsts; - // Look for ICmp users. for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) { IVStrideUse &UI = *I; @@ -399,18 +450,9 @@ void IndVarSimplify::EliminateIVComparisons() { DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); DeadInsts.push_back(ICmp); } - - // Now that we're done iterating through lists, clean up any instructions - // which are now dead. - while (!DeadInsts.empty()) - if (Instruction *Inst = - dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) - RecursivelyDeleteTriviallyDeadInstructions(Inst); } void IndVarSimplify::EliminateIVRemainders() { - SmallVector<WeakVH, 16> DeadInsts; - // Look for SRem and URem users. for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) { IVStrideUse &UI = *I; @@ -466,13 +508,6 @@ void IndVarSimplify::EliminateIVRemainders() { DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); DeadInsts.push_back(Rem); } - - // Now that we're done iterating through lists, clean up any instructions - // which are now dead. - while (!DeadInsts.empty()) - if (Instruction *Inst = - dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) - RecursivelyDeleteTriviallyDeadInstructions(Inst); } bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { @@ -491,6 +526,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { LI = &getAnalysis<LoopInfo>(); SE = &getAnalysis<ScalarEvolution>(); DT = &getAnalysis<DominatorTree>(); + DeadInsts.clear(); Changed = false; // If there are any floating-point recurrences, attempt to @@ -589,9 +625,21 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { ExitingBlock, BI, Rewriter); } - // Rewrite IV-derived expressions. Clears the rewriter cache. + // Rewrite IV-derived expressions. RewriteIVExpressions(L, Rewriter); + // Clear the rewriter cache, because values that are in the rewriter's cache + // can be deleted in the loop below, causing the AssertingVH in the cache to + // trigger. + Rewriter.clear(); + + // Now that we're done iterating through lists, clean up any instructions + // which are now dead. + while (!DeadInsts.empty()) + if (Instruction *Inst = + dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) + RecursivelyDeleteTriviallyDeadInstructions(Inst); + // The Rewriter may not be used from this point on. // Loop-invariant instructions in the preheader that aren't used in the @@ -632,7 +680,7 @@ static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) { if (!isSafe(*I, L, SE)) return false; return true; } - + // A cast is safe if its operand is. if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) return isSafe(C->getOperand(), L, SE); @@ -651,8 +699,6 @@ static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) { } void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { - SmallVector<WeakVH, 16> DeadInsts; - // Rewrite all induction variable expressions in terms of the canonical // induction variable. // @@ -705,6 +751,13 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { // Now expand it into actual Instructions and patch it into place. Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt); + DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' + << " into = " << *NewVal << "\n"); + + if (!isValidRewrite(Op, NewVal)) { + DeadInsts.push_back(NewVal); + continue; + } // Inform ScalarEvolution that this value is changing. The change doesn't // affect its value, but it does potentially affect which use lists the // value will be on after the replacement, which affects ScalarEvolution's @@ -717,25 +770,13 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { NewVal->takeName(Op); User->replaceUsesOfWith(Op, NewVal); UI->setOperandValToReplace(NewVal); - DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' - << " into = " << *NewVal << "\n"); + ++NumRemoved; Changed = true; // The old value may be dead now. DeadInsts.push_back(Op); } - - // Clear the rewriter cache, because values that are in the rewriter's cache - // can be deleted in the loop below, causing the AssertingVH in the cache to - // trigger. - Rewriter.clear(); - // Now that we're done iterating through lists, clean up any instructions - // which are now dead. - while (!DeadInsts.empty()) - if (Instruction *Inst = - dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) - RecursivelyDeleteTriviallyDeadInstructions(Inst); } /// If there's a single exit block, sink any loop-invariant values that @@ -859,7 +900,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { BinaryOperator *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return; - + // If this is not an add of the PHI with a constantfp, or if the constant fp // is not an integer, bail out. ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); @@ -884,7 +925,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { if (Compare == 0 || !Compare->hasOneUse() || !isa<BranchInst>(Compare->use_back())) return; - + BranchInst *TheBr = cast<BranchInst>(Compare->use_back()); // We need to verify that the branch actually controls the iteration count @@ -896,8 +937,8 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { (L->contains(TheBr->getSuccessor(0)) && L->contains(TheBr->getSuccessor(1)))) return; - - + + // If it isn't a comparison with an integer-as-fp (the exit value), we can't // transform it. ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); @@ -905,7 +946,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { if (ExitValueVal == 0 || !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) return; - + // Find new predicate for integer comparison. CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; switch (Compare->getPredicate()) { @@ -923,13 +964,13 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { case CmpInst::FCMP_OLE: case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; } - + // We convert the floating point induction variable to a signed i32 value if // we can. This is only safe if the comparison will not overflow in a way // that won't be trapped by the integer equivalent operations. Check for this // now. // TODO: We could use i64 if it is native and the range requires it. - + // The start/stride/exit values must all fit in signed i32. if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) return; @@ -945,59 +986,59 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { if (InitValue >= ExitValue || NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE) return; - + uint32_t Range = uint32_t(ExitValue-InitValue); if (NewPred == CmpInst::ICMP_SLE) { // Normalize SLE -> SLT, check for infinite loop. if (++Range == 0) return; // Range overflows. } - + unsigned Leftover = Range % uint32_t(IncValue); - + // If this is an equality comparison, we require that the strided value // exactly land on the exit value, otherwise the IV condition will wrap // around and do things the fp IV wouldn't. if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && Leftover != 0) return; - + // If the stride would wrap around the i32 before exiting, we can't // transform the IV. if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) return; - + } else { // If we have a negative stride, we require the init to be greater than the // exit value and an equality or greater than comparison. if (InitValue >= ExitValue || NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE) return; - + uint32_t Range = uint32_t(InitValue-ExitValue); if (NewPred == CmpInst::ICMP_SGE) { // Normalize SGE -> SGT, check for infinite loop. if (++Range == 0) return; // Range overflows. } - + unsigned Leftover = Range % uint32_t(-IncValue); - + // If this is an equality comparison, we require that the strided value // exactly land on the exit value, otherwise the IV condition will wrap // around and do things the fp IV wouldn't. if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && Leftover != 0) return; - + // If the stride would wrap around the i32 before exiting, we can't // transform the IV. if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) return; } - + const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); // Insert new integer induction variable. - PHINode *NewPHI = PHINode::Create(Int32Ty, PN->getName()+".int", PN); + PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), PN->getIncomingBlock(IncomingEdge)); diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 90094a8..7168177 100644 --- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -16,6 +16,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Pass.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/Loads.h" @@ -170,9 +171,9 @@ bool JumpThreading::runOnFunction(Function &F) { Changed = true; continue; } - + BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); - + // Can't thread an unconditional jump, but if the block is "almost // empty", we can replace uses of it with uses of the successor and make // this dead. @@ -608,7 +609,7 @@ static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) { static bool hasAddressTakenAndUsed(BasicBlock *BB) { if (!BB->hasAddressTaken()) return false; - + // If the block has its address taken, it may be a tree of dead constants // hanging off of it. These shouldn't keep the block alive. BlockAddress *BA = BlockAddress::get(BB); @@ -668,6 +669,17 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { return false; // Must be an invoke. } + // Run constant folding to see if we can reduce the condition to a simple + // constant. + if (Instruction *I = dyn_cast<Instruction>(Condition)) { + Value *SimpleVal = ConstantFoldInstruction(I, TD); + if (SimpleVal) { + I->replaceAllUsesWith(SimpleVal); + I->eraseFromParent(); + Condition = SimpleVal; + } + } + // If the terminator is branching on an undef, we can pick any of the // successors to branch to. Let GetBestDestForJumpOnUndef decide. if (isa<UndefValue>(Condition)) { @@ -928,13 +940,14 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { array_pod_sort(AvailablePreds.begin(), AvailablePreds.end()); // Create a PHI node at the start of the block for the PRE'd load value. - PHINode *PN = PHINode::Create(LI->getType(), "", LoadBB->begin()); + pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB); + PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "", + LoadBB->begin()); PN->takeName(LI); // Insert new entries into the PHI for each predecessor. A single block may // have multiple entries here. - for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E; - ++PI) { + for (pred_iterator PI = PB; PI != PE; ++PI) { BasicBlock *P = *PI; AvailablePredsTy::iterator I = std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(), diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp index 0786793..93de9cf 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp @@ -445,7 +445,8 @@ void LICM::sink(Instruction &I) { // enough that we handle it as a special (more efficient) case. It is more // efficient to handle because there are no PHI nodes that need to be placed. if (ExitBlocks.size() == 1) { - if (!DT->dominates(I.getParent(), ExitBlocks[0])) { + if (!isa<DbgInfoIntrinsic>(I) && + !DT->dominates(I.getParent(), ExitBlocks[0])) { // Instruction is not used, just delete it. CurAST->deleteValue(&I); // If I has users in unreachable blocks, eliminate. @@ -742,30 +743,13 @@ void LICM::PromoteAliasSet(AliasSet &AS) { Preheader->getTerminator()); SSA.AddAvailableValue(Preheader, PreheaderLoad); - // Copy any value stored to or loaded from a must-alias of the pointer. - if (PreheaderLoad->getType()->isPointerTy()) { - Value *SomeValue; - if (LoadInst *LI = dyn_cast<LoadInst>(LoopUses[0])) - SomeValue = LI; - else - SomeValue = cast<StoreInst>(LoopUses[0])->getValueOperand(); - - CurAST->copyValue(SomeValue, PreheaderLoad); - } - // Rewrite all the loads in the loop and remember all the definitions from // stores in the loop. Promoter.run(LoopUses); - - // If the preheader load is itself a pointer, we need to tell alias analysis - // about the new pointer we created in the preheader block and about any PHI - // nodes that just got inserted. - if (PreheaderLoad->getType()->isPointerTy()) { - for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) - CurAST->copyValue(PreheaderLoad, NewPHIs[i]); - } - - // fwew, we're done! + + // If the SSAUpdater didn't use the load in the preheader, just zap it now. + if (PreheaderLoad->use_empty()) + PreheaderLoad->eraseFromParent(); } diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index f8ce214..1366231 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -81,7 +81,7 @@ namespace { bool processLoopStore(StoreInst *SI, const SCEV *BECount); bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); - + bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment, Value *SplatValue, Instruction *TheStore, @@ -91,7 +91,7 @@ namespace { const SCEVAddRecExpr *StoreEv, const SCEVAddRecExpr *LoadEv, const SCEV *BECount); - + /// This transformation requires natural loop information & requires that /// loop preheaders be inserted into the CFG. /// @@ -134,50 +134,50 @@ Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); } /// static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { SmallVector<Instruction*, 32> NowDeadInsts; - + NowDeadInsts.push_back(I); - + // Before we touch this instruction, remove it from SE! do { Instruction *DeadInst = NowDeadInsts.pop_back_val(); - + // This instruction is dead, zap it, in stages. Start by removing it from // SCEV. SE.forgetValue(DeadInst); - + for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { Value *Op = DeadInst->getOperand(op); DeadInst->setOperand(op, 0); - + // If this operand just became dead, add it to the NowDeadInsts list. if (!Op->use_empty()) continue; - + if (Instruction *OpI = dyn_cast<Instruction>(Op)) if (isInstructionTriviallyDead(OpI)) NowDeadInsts.push_back(OpI); } - + DeadInst->eraseFromParent(); - + } while (!NowDeadInsts.empty()); } bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { CurLoop = L; - + // The trip count of the loop must be analyzable. SE = &getAnalysis<ScalarEvolution>(); if (!SE->hasLoopInvariantBackedgeTakenCount(L)) return false; const SCEV *BECount = SE->getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(BECount)) return false; - + // If this loop executes exactly one time, then it should be peeled, not // optimized by this pass. if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount)) if (BECst->getValue()->getValue() == 0) return false; - + // We require target data for now. TD = getAnalysisIfAvailable<TargetData>(); if (TD == 0) return false; @@ -185,14 +185,14 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { DT = &getAnalysis<DominatorTree>(); LoopInfo &LI = getAnalysis<LoopInfo>(); TLI = &getAnalysis<TargetLibraryInfo>(); - + SmallVector<BasicBlock*, 8> ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); DEBUG(dbgs() << "loop-idiom Scanning: F[" << L->getHeader()->getParent()->getName() << "] Loop %" << L->getHeader()->getName() << "\n"); - + bool MadeChange = false; // Scan all the blocks in the loop that are not in subloops. for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E; @@ -200,7 +200,7 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { // Ignore blocks in subloops. if (LI.getLoopFor(*BI) != CurLoop) continue; - + MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks); } return MadeChange; @@ -217,7 +217,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) if (!DT->dominates(BB, ExitBlocks[i])) return false; - + bool MadeChange = false; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { Instruction *Inst = I++; @@ -226,20 +226,20 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, WeakVH InstPtr(I); if (!processLoopStore(SI, BECount)) continue; MadeChange = true; - + // If processing the store invalidated our iterator, start over from the // top of the block. if (InstPtr == 0) I = BB->begin(); continue; } - + // Look for memset instructions, which may be optimized to a larger memset. if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) { WeakVH InstPtr(I); if (!processLoopMemSet(MSI, BECount)) continue; MadeChange = true; - + // If processing the memset invalidated our iterator, start over from the // top of the block. if (InstPtr == 0) @@ -247,7 +247,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, continue; } } - + return MadeChange; } @@ -258,12 +258,12 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { Value *StoredVal = SI->getValueOperand(); Value *StorePtr = SI->getPointerOperand(); - + // Reject stores that are so large that they overflow an unsigned. uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType()); if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) return false; - + // See if the pointer expression is an AddRec like {base,+,1} on the current // loop, which indicates a strided store. If we have something else, it's a // random store we can't handle. @@ -274,9 +274,9 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { // Check to see if the stride matches the size of the store. If so, then we // know that every byte is touched in the loop. - unsigned StoreSize = (unsigned)SizeInBits >> 3; + unsigned StoreSize = (unsigned)SizeInBits >> 3; const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1)); - + if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) { // TODO: Could also handle negative stride here someday, that will require // the validity check in mayLoopAccessLocation to be updated though. @@ -285,7 +285,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { dbgs() << "NEGATIVE STRIDE: " << *SI << "\n"; dbgs() << "BB: " << *SI->getParent(); } - + return false; } @@ -319,9 +319,9 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { // If we're not allowed to hack on memset, we fail. if (!TLI->has(LibFunc::memset)) return false; - + Value *Pointer = MSI->getDest(); - + // See if the pointer expression is an AddRec like {base,+,1} on the current // loop, which indicates a strided store. If we have something else, it's a // random store we can't handle. @@ -333,16 +333,16 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue(); if ((SizeInBytes >> 32) != 0) return false; - + // Check to see if the stride matches the size of the memset. If so, then we // know that every byte is touched in the loop. const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1)); - + // TODO: Could also handle negative stride here someday, that will require the // validity check in mayLoopAccessLocation to be updated though. if (Stride == 0 || MSI->getLength() != Stride->getValue()) return false; - + return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, MSI->getAlignment(), MSI->getValue(), MSI, Ev, BECount); @@ -365,7 +365,7 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access, // to be exactly the size of the memset, which is (BECount+1)*StoreSize if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount)) AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize; - + // TODO: For this to be really effective, we have to dive into the pointer // operand in the store. Store to &A[i] of 100 will always return may alias // with store of &A[100], we need to StoreLoc to be "A" with size of 100, @@ -394,12 +394,12 @@ static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) { // that doesn't seem worthwhile. Constant *C = dyn_cast<Constant>(V); if (C == 0) return 0; - + // Only handle simple values that are a power of two bytes in size. uint64_t Size = TD.getTypeSizeInBits(V->getType()); if (Size == 0 || (Size & 7) || (Size & (Size-1))) return 0; - + // Don't care enough about darwin/ppc to implement this. if (TD.isBigEndian()) return 0; @@ -410,7 +410,7 @@ static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) { // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see // if the top and bottom are the same (e.g. for vectors and large integers). if (Size > 16) return 0; - + // If the constant is exactly 16 bytes, just use it. if (Size == 16) return C; @@ -428,14 +428,14 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment, Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev, const SCEV *BECount) { - + // If the stored value is a byte-wise value (like i32 -1), then it may be // turned into a memset of i8 -1, assuming that all the consecutive bytes // are stored. A store of i32 0x01020304 can never be turned into a memset, // but it can be turned into memset_pattern if the target supports it. Value *SplatValue = isBytewiseValue(StoredVal); Constant *PatternValue = 0; - + // If we're allowed to form a memset, and the stored value would be acceptable // for memset, use it. if (SplatValue && TLI->has(LibFunc::memset) && @@ -453,8 +453,8 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // do anything with a 3-byte store, for example. return false; } - - + + // Okay, we have a strided store "p[i]" of a splattable value. We can turn // this into a memset in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read @@ -463,47 +463,47 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, CurLoop, BECount, StoreSize, getAnalysis<AliasAnalysis>(), TheStore)) return false; - + // Okay, everything looks good, insert the memset. BasicBlock *Preheader = CurLoop->getLoopPreheader(); - + IRBuilder<> Builder(Preheader->getTerminator()); - + // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the // header. Just insert code for it in the preheader. SCEVExpander Expander(*SE); - + unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace(); - Value *BasePtr = + Value *BasePtr = Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace), Preheader->getTerminator()); - + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext()); BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); - + const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), - true /*no unsigned overflow*/); + SCEV::FlagNUW); if (StoreSize != 1) NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), - true /*no unsigned overflow*/); - - Value *NumBytes = + SCEV::FlagNUW); + + Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); - - Value *NewCall; + + CallInst *NewCall; if (SplatValue) NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment); else { Module *M = TheStore->getParent()->getParent()->getParent(); Value *MSP = M->getOrInsertFunction("memset_pattern16", Builder.getVoidTy(), - Builder.getInt8PtrTy(), + Builder.getInt8PtrTy(), Builder.getInt8PtrTy(), IntPtr, (void*)0); - + // Otherwise we should form a memset_pattern16. PatternValue is known to be // an constant array of 16-bytes. Plop the value into a mergable global. GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true, @@ -514,11 +514,11 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy()); NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes); } - + DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n" << " from store to: " << *Ev << " at: " << *TheStore << "\n"); - (void)NewCall; - + NewCall->setDebugLoc(TheStore->getDebugLoc()); + // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. DeleteDeadInstruction(TheStore, *SE); @@ -536,9 +536,9 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // If we're not allowed to form memcpy, we fail. if (!TLI->has(LibFunc::memcpy)) return false; - + LoadInst *LI = cast<LoadInst>(SI->getValueOperand()); - + // Okay, we have a strided store "p[i]" of a loaded value. We can turn // this into a memcpy in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read @@ -555,49 +555,49 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, CurLoop, BECount, StoreSize, getAnalysis<AliasAnalysis>(), SI)) return false; - + // Okay, everything looks good, insert the memcpy. BasicBlock *Preheader = CurLoop->getLoopPreheader(); - + IRBuilder<> Builder(Preheader->getTerminator()); - + // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the // header. Just insert code for it in the preheader. SCEVExpander Expander(*SE); - Value *LoadBasePtr = + Value *LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(), Builder.getInt8PtrTy(LI->getPointerAddressSpace()), Preheader->getTerminator()); - Value *StoreBasePtr = + Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(), Builder.getInt8PtrTy(SI->getPointerAddressSpace()), Preheader->getTerminator()); - + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. const Type *IntPtr = TD->getIntPtrType(SI->getContext()); BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); - + const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), - true /*no unsigned overflow*/); + SCEV::FlagNUW); if (StoreSize != 1) NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), - true /*no unsigned overflow*/); - + SCEV::FlagNUW); + Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); - + Value *NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, std::min(SI->getAlignment(), LI->getAlignment())); - + DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); (void)NewCall; - + // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. DeleteDeadInstruction(SI, *SE); diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp index 95e1578..47dced3 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp @@ -184,7 +184,11 @@ bool LoopRotate::rotateLoop(Loop *L) { // Now, this loop is suitable for rotation. BasicBlock *OrigPreheader = L->getLoopPreheader(); BasicBlock *OrigLatch = L->getLoopLatch(); - assert(OrigPreheader && OrigLatch && "Loop not in canonical form?"); + + // If the loop could not be converted to canonical form, it must have an + // indirectbr in it, just give up. + if (OrigPreheader == 0 || OrigLatch == 0) + return false; // Anything ScalarEvolution may know about this loop or the PHI nodes // in its header will soon be invalidated. @@ -322,7 +326,8 @@ bool LoopRotate::rotateLoop(Loop *L) { // We can fold the conditional branch in the preheader, this makes things // simpler. The first step is to remove the extra edge to the Exit block. Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/); - BranchInst::Create(NewHeader, PHBI); + BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI); + NewBI->setDebugLoc(PHBI->getDebugLoc()); PHBI->eraseFromParent(); // With our CFG finalized, update DomTree if it is available. diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index ac4aea2..5abc790 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -253,7 +253,8 @@ static void DoInitialMatch(const SCEV *S, Loop *L, DoInitialMatch(AR->getStart(), L, Good, Bad, SE); DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), AR->getStepRecurrence(SE), - AR->getLoop()), + // FIXME: AR->getNoWrapFlags() + AR->getLoop(), SCEV::FlagAnyWrap), L, Good, Bad, SE); return; } @@ -455,7 +456,10 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE, IgnoreSignificantBits); if (!Start) return 0; - return SE.getAddRecExpr(Start, Step, AR->getLoop()); + // FlagNW is independent of the start value, step direction, and is + // preserved with smaller magnitude steps. + // FIXME: AR->getNoWrapFlags(SCEV::FlagNW) + return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap); } return 0; } @@ -520,7 +524,9 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) { SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end()); int64_t Result = ExtractImmediate(NewOps.front(), SE); if (Result != 0) - S = SE.getAddRecExpr(NewOps, AR->getLoop()); + S = SE.getAddRecExpr(NewOps, AR->getLoop(), + // FIXME: AR->getNoWrapFlags(SCEV::FlagNW) + SCEV::FlagAnyWrap); return Result; } return 0; @@ -545,7 +551,9 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) { SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end()); GlobalValue *Result = ExtractSymbol(NewOps.front(), SE); if (Result) - S = SE.getAddRecExpr(NewOps, AR->getLoop()); + S = SE.getAddRecExpr(NewOps, AR->getLoop(), + // FIXME: AR->getNoWrapFlags(SCEV::FlagNW) + SCEV::FlagAnyWrap); return Result; } return 0; @@ -564,9 +572,6 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::prefetch: - case Intrinsic::x86_sse2_loadu_dq: - case Intrinsic::x86_sse2_loadu_pd: - case Intrinsic::x86_sse_loadu_ps: case Intrinsic::x86_sse_storeu_ps: case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: @@ -781,7 +786,7 @@ void Cost::RateFormula(const Formula &F, } } -/// Loose - Set this cost to a loosing value. +/// Loose - Set this cost to a losing value. void Cost::Loose() { NumRegs = ~0u; AddRecCost = ~0u; @@ -1483,7 +1488,7 @@ void LSRInstance::OptimizeShadowIV() { if (!C->getValue().isStrictlyPositive()) continue; /* Add new PHINode. */ - PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH); + PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH); /* create new increment. '++d' in above example. */ Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue()); @@ -1819,7 +1824,7 @@ LSRInstance::OptimizeLoopTermCond() { } } -/// reconcileNewOffset - Determine if the given use can accomodate a fixup +/// reconcileNewOffset - Determine if the given use can accommodate a fixup /// at the given offset and other details. If so, update the use and /// return true. bool @@ -2236,7 +2241,9 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, if (!AR->getStart()->isZero()) { CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), AR->getStepRecurrence(SE), - AR->getLoop()), + AR->getLoop(), + //FIXME: AR->getNoWrapFlags(SCEV::FlagNW) + SCEV::FlagAnyWrap), C, Ops, L, SE); CollectSubexprs(AR->getStart(), C, Ops, L, SE); return; @@ -3047,7 +3054,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { } } -/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call +/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call /// FilterOutUndesirableDedicatedRegisters again, if necessary, now that /// we've done more filtering, as it may be able to find more formulae to /// eliminate. diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 80b263a..fef6bc3 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -43,7 +43,13 @@ namespace { class LoopUnroll : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopUnroll() : LoopPass(ID) { + LoopUnroll(int T = -1, int C = -1, int P = -1) : LoopPass(ID) { + CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T); + CurrentCount = (C == -1) ? UnrollCount : unsigned(C); + CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P; + + UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0); + initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); } @@ -56,7 +62,10 @@ namespace { // explicit -unroll-threshold). static const unsigned OptSizeUnrollThreshold = 50; + unsigned CurrentCount; unsigned CurrentThreshold; + bool CurrentAllowPartial; + bool UserThreshold; // CurrentThreshold is user-specified. bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -87,7 +96,9 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false) -Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); } +Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) { + return new LoopUnroll(Threshold, Count, AllowPartial); +} /// ApproximateLoopSize - Approximate the size of the loop. static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) { @@ -119,14 +130,14 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // from UnrollThreshold, it is overridden to a smaller value if the current // function is marked as optimize-for-size, and the unroll threshold was // not user specified. - CurrentThreshold = UnrollThreshold; - if (Header->getParent()->hasFnAttr(Attribute::OptimizeForSize) && - UnrollThreshold.getNumOccurrences() == 0) - CurrentThreshold = OptSizeUnrollThreshold; + unsigned Threshold = CurrentThreshold; + if (!UserThreshold && + Header->getParent()->hasFnAttr(Attribute::OptimizeForSize)) + Threshold = OptSizeUnrollThreshold; // Find trip count unsigned TripCount = L->getSmallConstantTripCount(); - unsigned Count = UnrollCount; + unsigned Count = CurrentCount; // Automatically select an unroll count. if (Count == 0) { @@ -140,7 +151,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { } // Enforce the threshold. - if (CurrentThreshold != NoThreshold) { + if (Threshold != NoThreshold) { unsigned NumInlineCandidates; unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates); DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); @@ -149,16 +160,16 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { return false; } uint64_t Size = (uint64_t)LoopSize*Count; - if (TripCount != 1 && Size > CurrentThreshold) { + if (TripCount != 1 && Size > Threshold) { DEBUG(dbgs() << " Too large to fully unroll with count: " << Count - << " because size: " << Size << ">" << CurrentThreshold << "\n"); - if (!UnrollAllowPartial) { + << " because size: " << Size << ">" << Threshold << "\n"); + if (!CurrentAllowPartial) { DEBUG(dbgs() << " will not try to unroll partially because " << "-unroll-allow-partial not given\n"); return false; } // Reduce unroll count to be modulo of TripCount for partial unrolling - Count = CurrentThreshold / LoopSize; + Count = Threshold / LoopSize; while (Count != 0 && TripCount%Count != 0) { Count--; } diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index bde0e53..a3035cb 100644 --- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/IRBuilder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include <list> using namespace llvm; @@ -299,12 +300,15 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, namespace { class MemCpyOpt : public FunctionPass { MemoryDependenceAnalysis *MD; + TargetLibraryInfo *TLI; const TargetData *TD; public: static char ID; // Pass identification, replacement for typeid MemCpyOpt() : FunctionPass(ID) { initializeMemCpyOptPass(*PassRegistry::getPassRegistry()); MD = 0; + TLI = 0; + TD = 0; } bool runOnFunction(Function &F); @@ -316,6 +320,7 @@ namespace { AU.addRequired<DominatorTree>(); AU.addRequired<MemoryDependenceAnalysis>(); AU.addRequired<AliasAnalysis>(); + AU.addRequired<TargetLibraryInfo>(); AU.addPreserved<AliasAnalysis>(); AU.addPreserved<MemoryDependenceAnalysis>(); } @@ -346,6 +351,7 @@ INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false) @@ -688,7 +694,7 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, if (M->getSource() == MDep->getSource()) return false; - // Second, the length of the memcpy's must be the same, or the preceeding one + // Second, the length of the memcpy's must be the same, or the preceding one // must be larger than the following one. ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength()); ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength()); @@ -804,6 +810,9 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { bool MemCpyOpt::processMemMove(MemMoveInst *M) { AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); + if (!TLI->has(LibFunc::memmove)) + return false; + // See if the pointers alias. if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M))) return false; @@ -935,6 +944,14 @@ bool MemCpyOpt::runOnFunction(Function &F) { bool MadeChange = false; MD = &getAnalysis<MemoryDependenceAnalysis>(); TD = getAnalysisIfAvailable<TargetData>(); + TLI = &getAnalysis<TargetLibraryInfo>(); + + // If we don't have at least memset and memcpy, there is little point of doing + // anything here. These are required by a freestanding implementation, so if + // even they are disabled, there is no point in trying hard. + if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy)) + return false; + while (1) { if (!iterateOnFunction(F)) break; diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp index e093b52..c1dfe15 100644 --- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -22,6 +22,7 @@ #define DEBUG_TYPE "reassociate" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -74,6 +75,8 @@ namespace { class Reassociate : public FunctionPass { DenseMap<BasicBlock*, unsigned> RankMap; DenseMap<AssertingVH<>, unsigned> ValueRankMap; + SmallVector<WeakVH, 8> RedoInsts; + SmallVector<WeakVH, 8> DeadInsts; bool MadeChange; public: static char ID; // Pass identification, replacement for typeid @@ -98,7 +101,7 @@ namespace { void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops); void LinearizeExpr(BinaryOperator *I); Value *RemoveFactorFromExpression(Value *V, Value *Factor); - void ReassociateBB(BasicBlock *BB); + void ReassociateInst(BasicBlock::iterator &BBI); void RemoveDeadBinaryOp(Value *V); }; @@ -113,13 +116,13 @@ FunctionPass *llvm::createReassociatePass() { return new Reassociate(); } void Reassociate::RemoveDeadBinaryOp(Value *V) { Instruction *Op = dyn_cast<Instruction>(V); - if (!Op || !isa<BinaryOperator>(Op) || !Op->use_empty()) + if (!Op || !isa<BinaryOperator>(Op)) return; Value *LHS = Op->getOperand(0), *RHS = Op->getOperand(1); ValueRankMap.erase(Op); - Op->eraseFromParent(); + DeadInsts.push_back(Op); RemoveDeadBinaryOp(LHS); RemoveDeadBinaryOp(RHS); } @@ -214,6 +217,7 @@ static Instruction *LowerNegateToMultiply(Instruction *Neg, ValueRankMap.erase(Neg); Res->takeName(Neg); Neg->replaceAllUsesWith(Res); + Res->setDebugLoc(Neg->getDebugLoc()); Neg->eraseFromParent(); return Res; } @@ -503,6 +507,7 @@ static Instruction *BreakUpSubtract(Instruction *Sub, // Everyone now refers to the add instruction. ValueRankMap.erase(Sub); Sub->replaceAllUsesWith(New); + New->setDebugLoc(Sub->getDebugLoc()); Sub->eraseFromParent(); DEBUG(dbgs() << "Negated: " << *New << '\n'); @@ -528,6 +533,7 @@ static Instruction *ConvertShiftToMul(Instruction *Shl, ValueRankMap.erase(Shl); Mul->takeName(Shl); Shl->replaceAllUsesWith(Mul); + Mul->setDebugLoc(Shl->getDebugLoc()); Shl->eraseFromParent(); return Mul; } @@ -603,7 +609,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { // remaining operand. if (Factors.size() == 1) { ValueRankMap.erase(BO); - BO->eraseFromParent(); + DeadInsts.push_back(BO); V = Factors[0].Op; } else { RewriteExprTree(BO, Factors); @@ -732,7 +738,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I, // Now that we have inserted a multiply, optimize it. This allows us to // handle cases that require multiple factoring steps, such as this: // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6 - Mul = ReassociateExpression(cast<BinaryOperator>(Mul)); + RedoInsts.push_back(Mul); // If every add operand was a duplicate, return the multiply. if (Ops.empty()) @@ -960,71 +966,69 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, } -/// ReassociateBB - Inspect all of the instructions in this basic block, -/// reassociating them as we go. -void Reassociate::ReassociateBB(BasicBlock *BB) { - for (BasicBlock::iterator BBI = BB->begin(); BBI != BB->end(); ) { - Instruction *BI = BBI++; - if (BI->getOpcode() == Instruction::Shl && - isa<ConstantInt>(BI->getOperand(1))) - if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) { - MadeChange = true; - BI = NI; - } +/// ReassociateInst - Inspect and reassociate the instruction at the +/// given position, post-incrementing the position. +void Reassociate::ReassociateInst(BasicBlock::iterator &BBI) { + Instruction *BI = BBI++; + if (BI->getOpcode() == Instruction::Shl && + isa<ConstantInt>(BI->getOperand(1))) + if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) { + MadeChange = true; + BI = NI; + } - // Reject cases where it is pointless to do this. - if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() || - BI->getType()->isVectorTy()) - continue; // Floating point ops are not associative. - - // Do not reassociate boolean (i1) expressions. We want to preserve the - // original order of evaluation for short-circuited comparisons that - // SimplifyCFG has folded to AND/OR expressions. If the expression - // is not further optimized, it is likely to be transformed back to a - // short-circuited form for code gen, and the source order may have been - // optimized for the most likely conditions. - if (BI->getType()->isIntegerTy(1)) - continue; + // Reject cases where it is pointless to do this. + if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() || + BI->getType()->isVectorTy()) + return; // Floating point ops are not associative. + + // Do not reassociate boolean (i1) expressions. We want to preserve the + // original order of evaluation for short-circuited comparisons that + // SimplifyCFG has folded to AND/OR expressions. If the expression + // is not further optimized, it is likely to be transformed back to a + // short-circuited form for code gen, and the source order may have been + // optimized for the most likely conditions. + if (BI->getType()->isIntegerTy(1)) + return; - // If this is a subtract instruction which is not already in negate form, - // see if we can convert it to X+-Y. - if (BI->getOpcode() == Instruction::Sub) { - if (ShouldBreakUpSubtract(BI)) { - BI = BreakUpSubtract(BI, ValueRankMap); - // Reset the BBI iterator in case BreakUpSubtract changed the - // instruction it points to. - BBI = BI; - ++BBI; + // If this is a subtract instruction which is not already in negate form, + // see if we can convert it to X+-Y. + if (BI->getOpcode() == Instruction::Sub) { + if (ShouldBreakUpSubtract(BI)) { + BI = BreakUpSubtract(BI, ValueRankMap); + // Reset the BBI iterator in case BreakUpSubtract changed the + // instruction it points to. + BBI = BI; + ++BBI; + MadeChange = true; + } else if (BinaryOperator::isNeg(BI)) { + // Otherwise, this is a negation. See if the operand is a multiply tree + // and if this is not an inner node of a multiply tree. + if (isReassociableOp(BI->getOperand(1), Instruction::Mul) && + (!BI->hasOneUse() || + !isReassociableOp(BI->use_back(), Instruction::Mul))) { + BI = LowerNegateToMultiply(BI, ValueRankMap); MadeChange = true; - } else if (BinaryOperator::isNeg(BI)) { - // Otherwise, this is a negation. See if the operand is a multiply tree - // and if this is not an inner node of a multiply tree. - if (isReassociableOp(BI->getOperand(1), Instruction::Mul) && - (!BI->hasOneUse() || - !isReassociableOp(BI->use_back(), Instruction::Mul))) { - BI = LowerNegateToMultiply(BI, ValueRankMap); - MadeChange = true; - } } } + } - // If this instruction is a commutative binary operator, process it. - if (!BI->isAssociative()) continue; - BinaryOperator *I = cast<BinaryOperator>(BI); + // If this instruction is a commutative binary operator, process it. + if (!BI->isAssociative()) return; + BinaryOperator *I = cast<BinaryOperator>(BI); - // If this is an interior node of a reassociable tree, ignore it until we - // get to the root of the tree, to avoid N^2 analysis. - if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode())) - continue; + // If this is an interior node of a reassociable tree, ignore it until we + // get to the root of the tree, to avoid N^2 analysis. + if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode())) + return; - // If this is an add tree that is used by a sub instruction, ignore it - // until we process the subtract. - if (I->hasOneUse() && I->getOpcode() == Instruction::Add && - cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub) - continue; + // If this is an add tree that is used by a sub instruction, ignore it + // until we process the subtract. + if (I->hasOneUse() && I->getOpcode() == Instruction::Add && + cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub) + return; - ReassociateExpression(I); - } + ReassociateExpression(I); } Value *Reassociate::ReassociateExpression(BinaryOperator *I) { @@ -1051,6 +1055,8 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) { // eliminate it. DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n'); I->replaceAllUsesWith(V); + if (Instruction *VI = dyn_cast<Instruction>(V)) + VI->setDebugLoc(I->getDebugLoc()); RemoveDeadBinaryOp(I); ++NumAnnihil; return V; @@ -1074,6 +1080,8 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) { // This expression tree simplified to something that isn't a tree, // eliminate it. I->replaceAllUsesWith(Ops[0].Op); + if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op)) + OI->setDebugLoc(I->getDebugLoc()); RemoveDeadBinaryOp(I); return Ops[0].Op; } @@ -1091,7 +1099,21 @@ bool Reassociate::runOnFunction(Function &F) { MadeChange = false; for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) - ReassociateBB(FI); + for (BasicBlock::iterator BBI = FI->begin(); BBI != FI->end(); ) + ReassociateInst(BBI); + + // Now that we're done, revisit any instructions which are likely to + // have secondary reassociation opportunities. + while (!RedoInsts.empty()) + if (Value *V = RedoInsts.pop_back_val()) { + BasicBlock::iterator BBI = cast<Instruction>(V); + ReassociateInst(BBI); + } + + // Now that we're done, delete any instructions which are no longer used. + while (!DeadInsts.empty()) + if (Value *V = DeadInsts.pop_back_val()) + RecursivelyDeleteTriviallyDeadInstructions(V); // We are done with the rank map. RankMap.clear(); diff --git a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp index 459bb06..47afc77 100644 --- a/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/Reg2Mem.cpp @@ -9,7 +9,7 @@ // // This file demotes all registers to memory references. It is intented to be // the inverse of PromoteMemoryToRegister. By converting to loads, the only -// values live accross basic blocks are allocas and loads before phi nodes. +// values live across basic blocks are allocas and loads before phi nodes. // It is intended that this should make CFG hacking much easier. // To make later hacking easier, the entry block is split into two, such that // all introduced allocas and nothing else are in the entry block. diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp index c82e929..db8eb85 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -1989,7 +1989,7 @@ bool IPSCCP::runOnModule(Module &M) { ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType())); } - // If we infered constant or undef values for globals variables, we can delete + // If we inferred constant or undef values for globals variables, we can delete // the global and any stores that remain to it. const DenseMap<GlobalVariable*, LatticeVal> &TG = Solver.getTrackedGlobals(); for (DenseMap<GlobalVariable*, LatticeVal>::const_iterator I = TG.begin(), diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp index bf9ca6d..32a0506 100644 --- a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -17,6 +17,7 @@ #include "llvm-c/Initialization.h" #include "llvm/InitializePasses.h" #include "llvm/PassManager.h" +#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/Verifier.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Scalar.h" @@ -34,7 +35,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeDCEPass(Registry); initializeDeadInstEliminationPass(Registry); initializeDSEPass(Registry); - initializeGEPSplitterPass(Registry); initializeGVNPass(Registry); initializeEarlyCSEPass(Registry); initializeIndVarSimplifyPass(Registry); @@ -56,7 +56,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeSROA_DTPass(Registry); initializeSROA_SSAUpPass(Registry); initializeCFGSimplifyPassPass(Registry); - initializeSimplifyHalfPowrLibCallsPass(Registry); initializeSimplifyLibCallsPass(Registry); initializeSinkingPass(Registry); initializeTailDupPass(Registry); @@ -103,6 +102,10 @@ void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createLoopDeletionPass()); } +void LLVMAddLoopIdiomPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLoopIdiomPass()); +} + void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) { unwrap(PM)->add(createLoopRotatePass()); } @@ -135,6 +138,10 @@ void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createScalarReplAggregatesPass()); } +void LLVMAddScalarReplAggregatesPassSSA(LLVMPassManagerRef PM) { + unwrap(PM)->add(createScalarReplAggregatesPass(-1, false)); +} + void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM, int Threshold) { unwrap(PM)->add(createScalarReplAggregatesPass(Threshold)); @@ -159,3 +166,19 @@ void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) { void LLVMAddVerifierPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createVerifierPass()); } + +void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createCorrelatedValuePropagationPass()); +} + +void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createEarlyCSEPass()); +} + +void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createTypeBasedAliasAnalysisPass()); +} + +void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createBasicAliasAnalysisPass()); +} diff --git a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp index c3ca852..8178c27 100644 --- a/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -219,7 +219,7 @@ namespace { /// optimization, which scans the uses of an alloca and determines if it can /// rewrite it in terms of a single new alloca that can be mem2reg'd. class ConvertToScalarInfo { - /// AllocaSize - The size of the alloca being considered. + /// AllocaSize - The size of the alloca being considered in bytes. unsigned AllocaSize; const TargetData &TD; @@ -238,19 +238,22 @@ class ConvertToScalarInfo { /// also declared as a vector, we do want to promote to a vector. bool HadAVector; + /// HadNonMemTransferAccess - True if there is at least one access to the + /// alloca that is not a MemTransferInst. We don't want to turn structs into + /// large integers unless there is some potential for optimization. + bool HadNonMemTransferAccess; + public: explicit ConvertToScalarInfo(unsigned Size, const TargetData &td) - : AllocaSize(Size), TD(td) { - IsNotTrivial = false; - VectorTy = 0; - HadAVector = false; - } + : AllocaSize(Size), TD(td), IsNotTrivial(false), VectorTy(0), + HadAVector(false), HadNonMemTransferAccess(false) { } AllocaInst *TryConvert(AllocaInst *AI); private: bool CanConvertToScalar(Value *V, uint64_t Offset); - void MergeInType(const Type *In, uint64_t Offset); + void MergeInType(const Type *In, uint64_t Offset, bool IsLoadOrStore); + bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset); void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType, @@ -282,9 +285,14 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { << *VectorTy << '\n'); NewTy = VectorTy; // Use the vector type. } else { + unsigned BitWidth = AllocaSize * 8; + if (!HadAVector && !HadNonMemTransferAccess && + !TD.fitsInLegalInteger(BitWidth)) + return 0; + DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); // Create and insert the integer alloca. - NewTy = IntegerType::get(AI->getContext(), AllocaSize*8); + NewTy = IntegerType::get(AI->getContext(), BitWidth); } AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); ConvertUsesToScalar(AI, NewAI, 0); @@ -294,16 +302,21 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { /// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy) /// so far at the offset specified by Offset (which is specified in bytes). /// -/// There are two cases we handle here: +/// There are three cases we handle here: /// 1) A union of vector types of the same size and potentially its elements. /// Here we turn element accesses into insert/extract element operations. /// This promotes a <4 x float> with a store of float to the third element /// into a <4 x float> that uses insert element. -/// 2) A fully general blob of memory, which we turn into some (potentially +/// 2) A union of vector types with power-of-2 size differences, e.g. a float, +/// <2 x float> and <4 x float>. Here we turn element accesses into insert +/// and extract element operations, and <2 x float> accesses into a cast to +/// <2 x double>, an extract, and a cast back to <2 x float>. +/// 3) A fully general blob of memory, which we turn into some (potentially /// large) integer type with extract and insert operations where the loads /// and stores would mutate the memory. We mark this by setting VectorTy /// to VoidTy. -void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) { +void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset, + bool IsLoadOrStore) { // If we already decided to turn this into a blob of integer memory, there is // nothing to be done. if (VectorTy && VectorTy->isVoidTy()) @@ -314,33 +327,33 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) { // If the In type is a vector that is the same size as the alloca, see if it // matches the existing VecTy. if (const VectorType *VInTy = dyn_cast<VectorType>(In)) { - // Remember if we saw a vector type. - HadAVector = true; - - if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) { - // If we're storing/loading a vector of the right size, allow it as a - // vector. If this the first vector we see, remember the type so that - // we know the element size. If this is a subsequent access, ignore it - // even if it is a differing type but the same size. Worst case we can - // bitcast the resultant vectors. - if (VectorTy == 0) - VectorTy = VInTy; + if (MergeInVectorType(VInTy, Offset)) return; - } } else if (In->isFloatTy() || In->isDoubleTy() || (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 && isPowerOf2_32(In->getPrimitiveSizeInBits()))) { + // Full width accesses can be ignored, because they can always be turned + // into bitcasts. + unsigned EltSize = In->getPrimitiveSizeInBits()/8; + if (IsLoadOrStore && EltSize == AllocaSize) + return; + // If we're accessing something that could be an element of a vector, see // if the implied vector agrees with what we already have and if Offset is // compatible with it. - unsigned EltSize = In->getPrimitiveSizeInBits()/8; - if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 && - (VectorTy == 0 || - cast<VectorType>(VectorTy)->getElementType() - ->getPrimitiveSizeInBits()/8 == EltSize)) { - if (VectorTy == 0) + if (Offset % EltSize == 0 && AllocaSize % EltSize == 0) { + if (!VectorTy) { VectorTy = VectorType::get(In, AllocaSize/EltSize); - return; + return; + } + + unsigned CurrentEltSize = cast<VectorType>(VectorTy)->getElementType() + ->getPrimitiveSizeInBits()/8; + if (EltSize == CurrentEltSize) + return; + + if (In->isIntegerTy() && isPowerOf2_32(AllocaSize / EltSize)) + return; } } @@ -349,6 +362,77 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) { VectorTy = Type::getVoidTy(In->getContext()); } +/// MergeInVectorType - Handles the vector case of MergeInType, returning true +/// if the type was successfully merged and false otherwise. +bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy, + uint64_t Offset) { + // Remember if we saw a vector type. + HadAVector = true; + + // TODO: Support nonzero offsets? + if (Offset != 0) + return false; + + // Only allow vectors that are a power-of-2 away from the size of the alloca. + if (!isPowerOf2_64(AllocaSize / (VInTy->getBitWidth() / 8))) + return false; + + // If this the first vector we see, remember the type so that we know the + // element size. + if (!VectorTy) { + VectorTy = VInTy; + return true; + } + + unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth(); + unsigned InBitWidth = VInTy->getBitWidth(); + + // Vectors of the same size can be converted using a simple bitcast. + if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8)) + return true; + + const Type *ElementTy = cast<VectorType>(VectorTy)->getElementType(); + const Type *InElementTy = cast<VectorType>(VInTy)->getElementType(); + + // Do not allow mixed integer and floating-point accesses from vectors of + // different sizes. + if (ElementTy->isFloatingPointTy() != InElementTy->isFloatingPointTy()) + return false; + + if (ElementTy->isFloatingPointTy()) { + // Only allow floating-point vectors of different sizes if they have the + // same element type. + // TODO: This could be loosened a bit, but would anything benefit? + if (ElementTy != InElementTy) + return false; + + // There are no arbitrary-precision floating-point types, which limits the + // number of legal vector types with larger element types that we can form + // to bitcast and extract a subvector. + // TODO: We could support some more cases with mixed fp128 and double here. + if (!(BitWidth == 64 || BitWidth == 128) || + !(InBitWidth == 64 || InBitWidth == 128)) + return false; + } else { + assert(ElementTy->isIntegerTy() && "Vector elements must be either integer " + "or floating-point."); + unsigned BitWidth = ElementTy->getPrimitiveSizeInBits(); + unsigned InBitWidth = InElementTy->getPrimitiveSizeInBits(); + + // Do not allow integer types smaller than a byte or types whose widths are + // not a multiple of a byte. + if (BitWidth < 8 || InBitWidth < 8 || + BitWidth % 8 != 0 || InBitWidth % 8 != 0) + return false; + } + + // Pick the largest of the two vector types. + if (InBitWidth > BitWidth) + VectorTy = VInTy; + + return true; +} + /// CanConvertToScalar - V is a pointer. If we can convert the pointee and all /// its accesses to a single vector type, return true and set VecTy to /// the new type. If we could convert the alloca into a single promotable @@ -369,7 +453,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // Don't touch MMX operations. if (LI->getType()->isX86_MMXTy()) return false; - MergeInType(LI->getType(), Offset); + HadNonMemTransferAccess = true; + MergeInType(LI->getType(), Offset, true); continue; } @@ -379,7 +464,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // Don't touch MMX operations. if (SI->getOperand(0)->getType()->isX86_MMXTy()) return false; - MergeInType(SI->getOperand(0)->getType(), Offset); + HadNonMemTransferAccess = true; + MergeInType(SI->getOperand(0)->getType(), Offset, true); continue; } @@ -403,6 +489,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { if (!CanConvertToScalar(GEP, Offset+GEPOffset)) return false; IsNotTrivial = true; // Can't be mem2reg'd. + HadNonMemTransferAccess = true; continue; } @@ -414,6 +501,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { !isa<ConstantInt>(MSI->getLength())) return false; IsNotTrivial = true; // Can't be mem2reg'd. + HadNonMemTransferAccess = true; continue; } @@ -575,6 +663,63 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, } } +/// getScaledElementType - Gets a scaled element type for a partial vector +/// access of an alloca. The input types must be integer or floating-point +/// scalar or vector types, and the resulting type is an integer, float or +/// double. +static const Type *getScaledElementType(const Type *Ty1, const Type *Ty2, + unsigned NewBitWidth) { + bool IsFP1 = Ty1->isFloatingPointTy() || + (Ty1->isVectorTy() && + cast<VectorType>(Ty1)->getElementType()->isFloatingPointTy()); + bool IsFP2 = Ty2->isFloatingPointTy() || + (Ty2->isVectorTy() && + cast<VectorType>(Ty2)->getElementType()->isFloatingPointTy()); + + LLVMContext &Context = Ty1->getContext(); + + // Prefer floating-point types over integer types, as integer types may have + // been created by earlier scalar replacement. + if (IsFP1 || IsFP2) { + if (NewBitWidth == 32) + return Type::getFloatTy(Context); + if (NewBitWidth == 64) + return Type::getDoubleTy(Context); + } + + return Type::getIntNTy(Context, NewBitWidth); +} + +/// CreateShuffleVectorCast - Creates a shuffle vector to convert one vector +/// to another vector of the same element type which has the same allocation +/// size but different primitive sizes (e.g. <3 x i32> and <4 x i32>). +static Value *CreateShuffleVectorCast(Value *FromVal, const Type *ToType, + IRBuilder<> &Builder) { + const Type *FromType = FromVal->getType(); + const VectorType *FromVTy = cast<VectorType>(FromType); + const VectorType *ToVTy = cast<VectorType>(ToType); + assert((ToVTy->getElementType() == FromVTy->getElementType()) && + "Vectors must have the same element type"); + Value *UnV = UndefValue::get(FromType); + unsigned numEltsFrom = FromVTy->getNumElements(); + unsigned numEltsTo = ToVTy->getNumElements(); + + SmallVector<Constant*, 3> Args; + const Type* Int32Ty = Builder.getInt32Ty(); + unsigned minNumElts = std::min(numEltsFrom, numEltsTo); + unsigned i; + for (i=0; i != minNumElts; ++i) + Args.push_back(ConstantInt::get(Int32Ty, i)); + + if (i < numEltsTo) { + Constant* UnC = UndefValue::get(Int32Ty); + for (; i != numEltsTo; ++i) + Args.push_back(UnC); + } + Constant *Mask = ConstantVector::get(Args); + return Builder.CreateShuffleVector(FromVal, UnV, Mask, "tmpV"); +} + /// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer /// or vector value FromVal, extracting the bits from the offset specified by /// Offset. This returns the value, which is of type ToType. @@ -589,14 +734,46 @@ Value *ConvertToScalarInfo:: ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, uint64_t Offset, IRBuilder<> &Builder) { // If the load is of the whole new alloca, no conversion is needed. - if (FromVal->getType() == ToType && Offset == 0) + const Type *FromType = FromVal->getType(); + if (FromType == ToType && Offset == 0) return FromVal; // If the result alloca is a vector type, this is either an element // access or a bitcast to another vector type of the same size. - if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) { - if (ToType->isVectorTy()) - return Builder.CreateBitCast(FromVal, ToType, "tmp"); + if (const VectorType *VTy = dyn_cast<VectorType>(FromType)) { + unsigned ToTypeSize = TD.getTypeAllocSize(ToType); + if (ToTypeSize == AllocaSize) { + // If the two types have the same primitive size, use a bit cast. + // Otherwise, it is two vectors with the same element type that has + // the same allocation size but different number of elements so use + // a shuffle vector. + if (FromType->getPrimitiveSizeInBits() == + ToType->getPrimitiveSizeInBits()) + return Builder.CreateBitCast(FromVal, ToType, "tmp"); + else + return CreateShuffleVectorCast(FromVal, ToType, Builder); + } + + if (isPowerOf2_64(AllocaSize / ToTypeSize)) { + assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value " + "of a smaller vector type at a nonzero offset."); + + const Type *CastElementTy = getScaledElementType(FromType, ToType, + ToTypeSize * 8); + unsigned NumCastVectorElements = AllocaSize / ToTypeSize; + + LLVMContext &Context = FromVal->getContext(); + const Type *CastTy = VectorType::get(CastElementTy, + NumCastVectorElements); + Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp"); + + unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy); + unsigned Elt = Offset/EltSize; + assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); + Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get( + Type::getInt32Ty(Context), Elt), "tmp"); + return Builder.CreateBitCast(Extract, ToType, "tmp"); + } // Otherwise it must be an element access. unsigned Elt = 0; @@ -714,21 +891,49 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, // Changing the whole vector with memset or with an access of a different // vector type? - if (ValSize == VecSize) - return Builder.CreateBitCast(SV, AllocaType, "tmp"); + if (ValSize == VecSize) { + // If the two types have the same primitive size, use a bit cast. + // Otherwise, it is two vectors with the same element type that has + // the same allocation size but different number of elements so use + // a shuffle vector. + if (VTy->getPrimitiveSizeInBits() == + SV->getType()->getPrimitiveSizeInBits()) + return Builder.CreateBitCast(SV, AllocaType, "tmp"); + else + return CreateShuffleVectorCast(SV, VTy, Builder); + } - uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType()); + if (isPowerOf2_64(VecSize / ValSize)) { + assert(!(SV->getType()->isVectorTy() && Offset != 0) && "Can't insert a " + "value of a smaller vector type at a nonzero offset."); - // Must be an element insertion. - unsigned Elt = Offset/EltSize; + const Type *CastElementTy = getScaledElementType(VTy, SV->getType(), + ValSize); + unsigned NumCastVectorElements = VecSize / ValSize; - if (SV->getType() != VTy->getElementType()) - SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp"); + LLVMContext &Context = SV->getContext(); + const Type *OldCastTy = VectorType::get(CastElementTy, + NumCastVectorElements); + Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp"); - SV = Builder.CreateInsertElement(Old, SV, + Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp"); + + unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy); + unsigned Elt = Offset/EltSize; + assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); + Value *Insert = + Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get( + Type::getInt32Ty(Context), Elt), "tmp"); + return Builder.CreateBitCast(Insert, AllocaType, "tmp"); + } + + // Must be an element insertion. + assert(SV->getType() == VTy->getElementType()); + uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType()); + unsigned Elt = Offset/EltSize; + return Builder.CreateInsertElement(Old, SV, ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt), "tmp"); - return SV; } // If SV is a first-class aggregate value, insert each value recursively. @@ -1083,7 +1288,8 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { } const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType(); - PHINode *NewPN = PHINode::Create(LoadTy, PN->getName()+".ld", PN); + PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(), + PN->getName()+".ld", PN); // Get the TBAA tag and alignment to use from one of the loads. It doesn't // matter which one we get and if any differ, it doesn't matter. diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index ce5dd73..1137c2b 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -73,7 +73,8 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) { if (UseLLVMTrap) { Function *TrapFn = Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap); - CallInst::Create(TrapFn, "", I); + CallInst *CallTrap = CallInst::Create(TrapFn, "", I); + CallTrap->setDebugLoc(I->getDebugLoc()); } new UnreachableInst(I->getContext(), I); @@ -259,11 +260,12 @@ static bool MergeEmptyReturnBlocks(Function &F) { PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin()); if (RetBlockPHI == 0) { Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0); - RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), "merge", + pred_iterator PB = pred_begin(RetBlock), PE = pred_end(RetBlock); + RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), + std::distance(PB, PE), "merge", &RetBlock->front()); - for (pred_iterator PI = pred_begin(RetBlock), E = pred_end(RetBlock); - PI != E; ++PI) + for (pred_iterator PI = PB; PI != PE; ++PI) RetBlockPHI->addIncoming(InVal, *PI); RetBlock->getTerminator()->setOperand(0, RetBlockPHI); } diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp deleted file mode 100644 index 70ff32e..0000000 --- a/contrib/llvm/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp +++ /dev/null @@ -1,160 +0,0 @@ -//===- SimplifyHalfPowrLibCalls.cpp - Optimize specific half_powr calls ---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a simple pass that applies an experimental -// transformation on calls to specific functions. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "simplify-libcalls-halfpowr" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Target/TargetData.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Debug.h" -using namespace llvm; - -namespace { - /// This pass optimizes well half_powr function calls. - /// - class SimplifyHalfPowrLibCalls : public FunctionPass { - const TargetData *TD; - public: - static char ID; // Pass identification - SimplifyHalfPowrLibCalls() : FunctionPass(ID) { - initializeSimplifyHalfPowrLibCallsPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - } - - Instruction * - InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs, - Instruction *InsertPt); - }; - char SimplifyHalfPowrLibCalls::ID = 0; -} // end anonymous namespace. - -INITIALIZE_PASS(SimplifyHalfPowrLibCalls, "simplify-libcalls-halfpowr", - "Simplify half_powr library calls", false, false) - -// Public interface to the Simplify HalfPowr LibCalls pass. -FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() { - return new SimplifyHalfPowrLibCalls(); -} - -/// InlineHalfPowrs - Inline a sequence of adjacent half_powr calls, rearranging -/// their control flow to better facilitate subsequent optimization. -Instruction * -SimplifyHalfPowrLibCalls:: -InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs, - Instruction *InsertPt) { - std::vector<BasicBlock *> Bodies; - BasicBlock *NewBlock = 0; - - for (unsigned i = 0, e = HalfPowrs.size(); i != e; ++i) { - CallInst *Call = cast<CallInst>(HalfPowrs[i]); - Function *Callee = Call->getCalledFunction(); - - // Minimally sanity-check the CFG of half_powr to ensure that it contains - // the kind of code we expect. If we're running this pass, we have - // reason to believe it will be what we expect. - Function::iterator I = Callee->begin(); - BasicBlock *Prologue = I++; - if (I == Callee->end()) break; - BasicBlock *SubnormalHandling = I++; - if (I == Callee->end()) break; - BasicBlock *Body = I++; - if (I != Callee->end()) break; - if (SubnormalHandling->getSinglePredecessor() != Prologue) - break; - BranchInst *PBI = dyn_cast<BranchInst>(Prologue->getTerminator()); - if (!PBI || !PBI->isConditional()) - break; - BranchInst *SNBI = dyn_cast<BranchInst>(SubnormalHandling->getTerminator()); - if (!SNBI || SNBI->isConditional()) - break; - if (!isa<ReturnInst>(Body->getTerminator())) - break; - - Instruction *NextInst = llvm::next(BasicBlock::iterator(Call)); - - // Inline the call, taking care of what code ends up where. - NewBlock = SplitBlock(NextInst->getParent(), NextInst, this); - - InlineFunctionInfo IFI(0, TD); - bool B = InlineFunction(Call, IFI); - assert(B && "half_powr didn't inline?"); - (void)B; - - BasicBlock *NewBody = NewBlock->getSinglePredecessor(); - assert(NewBody); - Bodies.push_back(NewBody); - } - - if (!NewBlock) - return InsertPt; - - // Put the code for all the bodies into one block, to facilitate - // subsequent optimization. - (void)SplitEdge(NewBlock->getSinglePredecessor(), NewBlock, this); - for (unsigned i = 0, e = Bodies.size(); i != e; ++i) { - BasicBlock *Body = Bodies[i]; - Instruction *FNP = Body->getFirstNonPHI(); - // Splice the insts from body into NewBlock. - NewBlock->getInstList().splice(NewBlock->begin(), Body->getInstList(), - FNP, Body->getTerminator()); - } - - return NewBlock->begin(); -} - -/// runOnFunction - Top level algorithm. -/// -bool SimplifyHalfPowrLibCalls::runOnFunction(Function &F) { - TD = getAnalysisIfAvailable<TargetData>(); - - bool Changed = false; - std::vector<Instruction *> HalfPowrs; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - // Look for calls. - bool IsHalfPowr = false; - if (CallInst *CI = dyn_cast<CallInst>(I)) { - // Look for direct calls and calls to non-external functions. - Function *Callee = CI->getCalledFunction(); - if (Callee && Callee->hasExternalLinkage()) { - // Look for calls with well-known names. - if (Callee->getName() == "__half_powrf4") - IsHalfPowr = true; - } - } - if (IsHalfPowr) - HalfPowrs.push_back(I); - // We're looking for sequences of up to three such calls, which we'll - // simplify as a group. - if ((!IsHalfPowr && !HalfPowrs.empty()) || HalfPowrs.size() == 3) { - I = InlineHalfPowrs(HalfPowrs, I); - E = I->getParent()->end(); - HalfPowrs.clear(); - Changed = true; - } - } - assert(HalfPowrs.empty() && "Block had no terminator!"); - } - - return Changed; -} diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 9f136d4..6247b03 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -49,6 +49,7 @@ class LibCallOptimization { protected: Function *Caller; const TargetData *TD; + const TargetLibraryInfo *TLI; LLVMContext* Context; public: LibCallOptimization() { } @@ -62,9 +63,11 @@ public: virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) =0; - Value *OptimizeCall(CallInst *CI, const TargetData *TD, IRBuilder<> &B) { + Value *OptimizeCall(CallInst *CI, const TargetData *TD, + const TargetLibraryInfo *TLI, IRBuilder<> &B) { Caller = CI->getParent()->getParent(); this->TD = TD; + this->TLI = TLI; if (CI->getCalledFunction()) Context = &CI->getCalledFunction()->getContext(); @@ -97,6 +100,15 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { } return true; } + +static bool CallHasFloatingPointArgument(const CallInst *CI) { + for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end(); + it != e; ++it) { + if ((*it)->getType()->isFloatingPointTy()) + return true; + } + return false; +} /// IsOnlyUsedInEqualityComparison - Return true if it is only used in equality /// comparisons with With. @@ -1075,14 +1087,8 @@ struct ToAsciiOpt : public LibCallOptimization { // 'printf' Optimizations struct PrintFOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Require one fixed pointer argument and an integer/void result. - const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || - !(FT->getReturnType()->isIntegerTy() || - FT->getReturnType()->isVoidTy())) - return 0; - + Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { // Check for a fixed format string. std::string FormatStr; if (!GetConstantStringInfo(CI->getArgOperand(0), FormatStr)) @@ -1138,20 +1144,40 @@ struct PrintFOpt : public LibCallOptimization { } return 0; } + + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require one fixed pointer argument and an integer/void result. + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || + !(FT->getReturnType()->isIntegerTy() || + FT->getReturnType()->isVoidTy())) + return 0; + + if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // printf(format, ...) -> iprintf(format, ...) if no floating point + // arguments. + if (TLI->has(LibFunc::iprintf) && !CallHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *IPrintFFn = + M->getOrInsertFunction("iprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(IPrintFFn); + B.Insert(New); + return New; + } + return 0; + } }; //===---------------------------------------===// // 'sprintf' Optimizations struct SPrintFOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Require two fixed pointer arguments and an integer result. - const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - !FT->getReturnType()->isIntegerTy()) - return 0; - + Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { // Check for a fixed format string. std::string FormatStr; if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr)) @@ -1212,6 +1238,32 @@ struct SPrintFOpt : public LibCallOptimization { } return 0; } + + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require two fixed pointer arguments and an integer result. + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating + // point arguments. + if (TLI->has(LibFunc::siprintf) && !CallHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *SIPrintFFn = + M->getOrInsertFunction("siprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(SIPrintFFn); + B.Insert(New); + return New; + } + return 0; + } }; //===---------------------------------------===// @@ -1278,14 +1330,8 @@ struct FPutsOpt : public LibCallOptimization { // 'fprintf' Optimizations struct FPrintFOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Require two fixed paramters as pointers and integer result. - const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - !FT->getReturnType()->isIntegerTy()) - return 0; - + Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { // All the optimizations depend on the format string. std::string FormatStr; if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr)) @@ -1330,6 +1376,32 @@ struct FPrintFOpt : public LibCallOptimization { } return 0; } + + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require two fixed paramters as pointers and integer result. + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no + // floating point arguments. + if (TLI->has(LibFunc::fiprintf) && !CallHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *FIPrintFFn = + M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(FIPrintFFn); + B.Insert(New); + return New; + } + return 0; + } }; //===---------------------------------------===// @@ -1544,8 +1616,11 @@ bool SimplifyLibCalls::runOnFunction(Function &F) { // Set the builder to the instruction after the call. Builder.SetInsertPoint(BB, I); + // Use debug location of CI for all new instructions. + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + // Try to optimize this call. - Value *Result = LCO->OptimizeCall(CI, TD, Builder); + Value *Result = LCO->OptimizeCall(CI, TD, TLI, Builder); if (Result == 0) continue; DEBUG(dbgs() << "SimplifyLibCalls simplified: " << *CI; diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp index 5b6bc04..539cc6f 100644 --- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -36,7 +36,7 @@ // evaluated each time through the tail recursion. Safely keeping allocas // in the entry block requires analysis to proves that the tail-called // function does not read or write the stack object. -// 2. Tail recursion is only performed if the call immediately preceeds the +// 2. Tail recursion is only performed if the call immediately precedes the // return instruction. It's possible that there could be a jump between // the call and the return. // 3. There can be intervening operations between the call and the return that @@ -433,7 +433,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, if (CanMoveAboveCall(BBI, CI)) continue; // If we can't move the instruction above the call, it might be because it - // is an associative and commutative operation that could be tranformed + // is an associative and commutative operation that could be transformed // using accumulator recursion elimination. Check to see if this is the // case, and if so, remember the initial accumulator value for later. if ((AccumulatorRecursionEliminationInitVal = @@ -496,7 +496,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, Instruction *InsertPos = OldEntry->begin(); for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) { - PHINode *PN = PHINode::Create(I->getType(), + PHINode *PN = PHINode::Create(I->getType(), 2, I->getName() + ".tr", InsertPos); I->replaceAllUsesWith(PN); // Everyone use the PHI node now! PN->addIncoming(I, NewEntry); @@ -527,8 +527,10 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, if (AccumulatorRecursionEliminationInitVal) { Instruction *AccRecInstr = AccumulatorRecursionInstr; // Start by inserting a new PHI node for the accumulator. + pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry); PHINode *AccPN = PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(), + std::distance(PB, PE) + 1, "accumulator.tr", OldEntry->begin()); // Loop over all of the predecessors of the tail recursion block. For the @@ -537,8 +539,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, // other tail recursions eliminated) the accumulator is not modified. // Because we haven't added the branch in the current block to OldEntry yet, // it will not show up as a predecessor. - for (pred_iterator PI = pred_begin(OldEntry), PE = pred_end(OldEntry); - PI != PE; ++PI) { + for (pred_iterator PI = PB; PI != PE; ++PI) { BasicBlock *P = *PI; if (P == &F->getEntryBlock()) AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P); @@ -572,7 +573,9 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, // Now that all of the PHI nodes are in place, remove the call and // ret instructions, replacing them with an unconditional branch. - BranchInst::Create(OldEntry, Ret); + BranchInst *NewBI = BranchInst::Create(OldEntry, Ret); + NewBI->setDebugLoc(CI->getDebugLoc()); + BB->getInstList().erase(Ret); // Remove return. BB->getInstList().erase(CI); // Remove call. ++NumEliminated; |