diff options
author | rdivacky <rdivacky@FreeBSD.org> | 2010-05-27 15:15:58 +0000 |
---|---|---|
committer | rdivacky <rdivacky@FreeBSD.org> | 2010-05-27 15:15:58 +0000 |
commit | 1e3dec662ea18131c495db50caccc57f77b7a5fe (patch) | |
tree | 9fad9a5d5dd8c4ff54af48edad9c8cc26dd5fda1 /lib/Transforms | |
parent | 377552607e51dc1d3e6ff33833f9620bcfe815ac (diff) | |
download | FreeBSD-src-1e3dec662ea18131c495db50caccc57f77b7a5fe.zip FreeBSD-src-1e3dec662ea18131c495db50caccc57f77b7a5fe.tar.gz |
Update LLVM to r104832.
Diffstat (limited to 'lib/Transforms')
-rw-r--r-- | lib/Transforms/IPO/DeadArgumentElimination.cpp | 18 | ||||
-rw-r--r-- | lib/Transforms/IPO/InlineAlways.cpp | 3 | ||||
-rw-r--r-- | lib/Transforms/IPO/InlineSimple.cpp | 3 | ||||
-rw-r--r-- | lib/Transforms/IPO/MergeFunctions.cpp | 464 | ||||
-rw-r--r-- | lib/Transforms/IPO/StripSymbols.cpp | 6 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombine.h | 4 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCasts.cpp | 72 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineWorklist.h | 2 | ||||
-rw-r--r-- | lib/Transforms/Scalar/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Transforms/Scalar/GVN.cpp | 8 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopStrengthReduce.cpp | 490 | ||||
-rw-r--r-- | lib/Transforms/Scalar/SimplifyCFGPass.cpp | 14 | ||||
-rw-r--r-- | lib/Transforms/Scalar/SimplifyLibCalls.cpp | 11 | ||||
-rw-r--r-- | lib/Transforms/Scalar/Sink.cpp | 267 | ||||
-rw-r--r-- | lib/Transforms/Utils/CloneFunction.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/Utils/PromoteMemoryToRegister.cpp | 4 | ||||
-rw-r--r-- | lib/Transforms/Utils/SSAUpdater.cpp | 531 |
17 files changed, 1141 insertions, 759 deletions
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 6443dd4..692e47d 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -535,14 +535,14 @@ void DAE::MarkValue(const RetOrArg &RA, Liveness L, /// values (according to Uses) live as well. void DAE::MarkLive(const Function &F) { DEBUG(dbgs() << "DAE - Intrinsically live fn: " << F.getName() << "\n"); - // Mark the function as live. - LiveFunctions.insert(&F); - // Mark all arguments as live. - for (unsigned i = 0, e = F.arg_size(); i != e; ++i) - PropagateLiveness(CreateArg(&F, i)); - // Mark all return values as live. - for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i) - PropagateLiveness(CreateRet(&F, i)); + // Mark the function as live. + LiveFunctions.insert(&F); + // Mark all arguments as live. + for (unsigned i = 0, e = F.arg_size(); i != e; ++i) + PropagateLiveness(CreateArg(&F, i)); + // Mark all return values as live. + for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i) + PropagateLiveness(CreateRet(&F, i)); } /// MarkLive - Mark the given return value or argument as live. Additionally, @@ -859,7 +859,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { Value *RetVal; - if (NFTy->getReturnType() == Type::getVoidTy(F->getContext())) { + if (NFTy->getReturnType()->isVoidTy()) { RetVal = 0; } else { assert (RetTy->isStructTy()); diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index bc8028c..8e312e7 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -54,6 +54,9 @@ namespace { return removeDeadFunctions(CG, &NeverInline); } virtual bool doInitialization(CallGraph &CG); + void releaseMemory() { + CA.clear(); + } }; } diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index 46cf4b2..74b4a1c 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -49,6 +49,9 @@ namespace { CA.growCachedCostInfo(Caller, Callee); } virtual bool doInitialization(CallGraph &CG); + void releaseMemory() { + CA.clear(); + } }; } diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index b07e22c..622a9b5 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -17,32 +17,55 @@ // important that the hash function be high quality. The equality comparison // iterates through each instruction in each basic block. // -// When a match is found, the functions are folded. We can only fold two -// functions when we know that the definition of one of them is not -// overridable. +// When a match is found the functions are folded. If both functions are +// overridable, we move the functionality into a new internal function and +// leave two overridable thunks to it. // //===----------------------------------------------------------------------===// // // Future work: // -// * fold vector<T*>::push_back and vector<S*>::push_back. -// -// These two functions have different types, but in a way that doesn't matter -// to us. As long as we never see an S or T itself, using S* and S** is the -// same as using a T* and T**. -// // * virtual functions. // // Many functions have their address taken by the virtual function table for // the object they belong to. However, as long as it's only used for a lookup // and call, this is irrelevant, and we'd like to fold such implementations. // +// * use SCC to cut down on pair-wise comparisons and solve larger cycles. +// +// The current implementation loops over a pair-wise comparison of all +// functions in the program where the two functions in the pair are treated as +// assumed to be equal until proven otherwise. We could both use fewer +// comparisons and optimize more complex cases if we used strongly connected +// components of the call graph. +// +// * be smarter about bitcast. +// +// In order to fold functions, we will sometimes add either bitcast instructions +// or bitcast constant expressions. Unfortunately, this can confound further +// analysis since the two functions differ where one has a bitcast and the +// other doesn't. We should learn to peer through bitcasts without imposing bad +// performance properties. +// +// * don't emit aliases for Mach-O. +// +// Mach-O doesn't support aliases which means that we must avoid introducing +// them in the bitcode on architectures which don't support them, such as +// Mac OSX. There's a few approaches to this problem; +// a) teach codegen to lower global aliases to thunks on platforms which don't +// support them. +// b) always emit thunks, and create a separate thunk-to-alias pass which +// runs on ELF systems. This has the added benefit of transforming other +// thunks such as those produced by a C++ frontend into aliases when legal +// to do so. +// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mergefunc" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Constants.h" #include "llvm/InlineAsm.h" @@ -54,6 +77,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" #include <map> #include <vector> using namespace llvm; @@ -61,17 +85,33 @@ using namespace llvm; STATISTIC(NumFunctionsMerged, "Number of functions merged"); namespace { - struct MergeFunctions : public ModulePass { + class MergeFunctions : public ModulePass { + public: static char ID; // Pass identification, replacement for typeid MergeFunctions() : ModulePass(&ID) {} bool runOnModule(Module &M); + + private: + bool isEquivalentGEP(const GetElementPtrInst *GEP1, + const GetElementPtrInst *GEP2); + + bool equals(const BasicBlock *BB1, const BasicBlock *BB2); + bool equals(const Function *F, const Function *G); + + bool compare(const Value *V1, const Value *V2); + + const Function *LHS, *RHS; + typedef DenseMap<const Value *, unsigned long> IDMap; + IDMap Map; + DenseMap<const Function *, IDMap> Domains; + DenseMap<const Function *, unsigned long> DomainCount; + TargetData *TD; }; } char MergeFunctions::ID = 0; -static RegisterPass<MergeFunctions> -X("mergefunc", "Merge Functions"); +static RegisterPass<MergeFunctions> X("mergefunc", "Merge Functions"); ModulePass *llvm::createMergeFunctionsPass() { return new MergeFunctions(); @@ -95,15 +135,6 @@ static unsigned long hash(const Function *F) { return ID.ComputeHash(); } -/// IgnoreBitcasts - given a bitcast, returns the first non-bitcast found by -/// walking the chain of cast operands. Otherwise, returns the argument. -static Value* IgnoreBitcasts(Value *V) { - while (BitCastInst *BC = dyn_cast<BitCastInst>(V)) - V = BC->getOperand(0); - - return V; -} - /// isEquivalentType - any two pointers are equivalent. Otherwise, standard /// type equivalence rules apply. static bool isEquivalentType(const Type *Ty1, const Type *Ty2) { @@ -113,6 +144,14 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) { return false; switch(Ty1->getTypeID()) { + default: + llvm_unreachable("Unknown type!"); + // Fall through in Release-Asserts mode. + case Type::IntegerTyID: + case Type::OpaqueTyID: + // Ty1 == Ty2 would have returned true earlier. + return false; + case Type::VoidTyID: case Type::FloatTyID: case Type::DoubleTyID: @@ -123,15 +162,6 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) { case Type::MetadataTyID: return true; - case Type::IntegerTyID: - case Type::OpaqueTyID: - // Ty1 == Ty2 would have returned true earlier. - return false; - - default: - llvm_unreachable("Unknown type!"); - return false; - case Type::PointerTyID: { const PointerType *PTy1 = cast<PointerType>(Ty1); const PointerType *PTy2 = cast<PointerType>(Ty2); @@ -154,6 +184,21 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) { return true; } + case Type::UnionTyID: { + const UnionType *UTy1 = cast<UnionType>(Ty1); + const UnionType *UTy2 = cast<UnionType>(Ty2); + + // TODO: we could be fancy with union(A, union(A, B)) === union(A, B), etc. + if (UTy1->getNumElements() != UTy2->getNumElements()) + return false; + + for (unsigned i = 0, e = UTy1->getNumElements(); i != e; ++i) { + if (!isEquivalentType(UTy1->getElementType(i), UTy2->getElementType(i))) + return false; + } + return true; + } + case Type::FunctionTyID: { const FunctionType *FTy1 = cast<FunctionType>(Ty1); const FunctionType *FTy2 = cast<FunctionType>(Ty2); @@ -236,123 +281,136 @@ isEquivalentOperation(const Instruction *I1, const Instruction *I2) { return true; } -static bool compare(const Value *V, const Value *U) { - assert(!isa<BasicBlock>(V) && !isa<BasicBlock>(U) && - "Must not compare basic blocks."); - - assert(isEquivalentType(V->getType(), U->getType()) && - "Two of the same operation have operands of different type."); +bool MergeFunctions::isEquivalentGEP(const GetElementPtrInst *GEP1, + const GetElementPtrInst *GEP2) { + if (TD && GEP1->hasAllConstantIndices() && GEP2->hasAllConstantIndices()) { + SmallVector<Value *, 8> Indices1, Indices2; + for (GetElementPtrInst::const_op_iterator I = GEP1->idx_begin(), + E = GEP1->idx_end(); I != E; ++I) { + Indices1.push_back(*I); + } + for (GetElementPtrInst::const_op_iterator I = GEP2->idx_begin(), + E = GEP2->idx_end(); I != E; ++I) { + Indices2.push_back(*I); + } + uint64_t Offset1 = TD->getIndexedOffset(GEP1->getPointerOperandType(), + Indices1.data(), Indices1.size()); + uint64_t Offset2 = TD->getIndexedOffset(GEP2->getPointerOperandType(), + Indices2.data(), Indices2.size()); + return Offset1 == Offset2; + } - // TODO: If the constant is an expression of F, we should accept that it's - // equal to the same expression in terms of G. - if (isa<Constant>(V)) - return V == U; + // Equivalent types aren't enough. + if (GEP1->getPointerOperand()->getType() != + GEP2->getPointerOperand()->getType()) + return false; - // The caller has ensured that ValueMap[V] != U. Since Arguments are - // pre-loaded into the ValueMap, and Instructions are added as we go, we know - // that this can only be a mis-match. - if (isa<Instruction>(V) || isa<Argument>(V)) + if (GEP1->getNumOperands() != GEP2->getNumOperands()) return false; - if (isa<InlineAsm>(V) && isa<InlineAsm>(U)) { - const InlineAsm *IAF = cast<InlineAsm>(V); - const InlineAsm *IAG = cast<InlineAsm>(U); - return IAF->getAsmString() == IAG->getAsmString() && - IAF->getConstraintString() == IAG->getConstraintString(); + for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) { + if (!compare(GEP1->getOperand(i), GEP2->getOperand(i))) + return false; } - return false; + return true; } -static bool equals(const BasicBlock *BB1, const BasicBlock *BB2, - DenseMap<const Value *, const Value *> &ValueMap, - DenseMap<const Value *, const Value *> &SpeculationMap) { - // Speculatively add it anyways. If it's false, we'll notice a difference - // later, and this won't matter. - ValueMap[BB1] = BB2; +bool MergeFunctions::compare(const Value *V1, const Value *V2) { + if (V1 == LHS || V1 == RHS) + if (V2 == LHS || V2 == RHS) + return true; - BasicBlock::const_iterator FI = BB1->begin(), FE = BB1->end(); - BasicBlock::const_iterator GI = BB2->begin(), GE = BB2->end(); + // TODO: constant expressions in terms of LHS and RHS + if (isa<Constant>(V1)) + return V1 == V2; - do { - if (isa<BitCastInst>(FI)) { - ++FI; - continue; - } - if (isa<BitCastInst>(GI)) { - ++GI; - continue; - } + if (isa<InlineAsm>(V1) && isa<InlineAsm>(V2)) { + const InlineAsm *IA1 = cast<InlineAsm>(V1); + const InlineAsm *IA2 = cast<InlineAsm>(V2); + return IA1->getAsmString() == IA2->getAsmString() && + IA1->getConstraintString() == IA2->getConstraintString(); + } - if (!isEquivalentOperation(FI, GI)) - return false; + // We enumerate constants globally and arguments, basic blocks or + // instructions within the function they belong to. + const Function *Domain1 = NULL; + if (const Argument *A = dyn_cast<Argument>(V1)) { + Domain1 = A->getParent(); + } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V1)) { + Domain1 = BB->getParent(); + } else if (const Instruction *I = dyn_cast<Instruction>(V1)) { + Domain1 = I->getParent()->getParent(); + } - if (isa<GetElementPtrInst>(FI)) { - const GetElementPtrInst *GEPF = cast<GetElementPtrInst>(FI); - const GetElementPtrInst *GEPG = cast<GetElementPtrInst>(GI); - if (GEPF->hasAllZeroIndices() && GEPG->hasAllZeroIndices()) { - // It's effectively a bitcast. - ++FI, ++GI; - continue; - } + const Function *Domain2 = NULL; + if (const Argument *A = dyn_cast<Argument>(V2)) { + Domain2 = A->getParent(); + } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V2)) { + Domain2 = BB->getParent(); + } else if (const Instruction *I = dyn_cast<Instruction>(V2)) { + Domain2 = I->getParent()->getParent(); + } - // TODO: we only really care about the elements before the index - if (FI->getOperand(0)->getType() != GI->getOperand(0)->getType()) + if (Domain1 != Domain2) + if (Domain1 != LHS && Domain1 != RHS) + if (Domain2 != LHS && Domain2 != RHS) return false; - } - if (ValueMap[FI] == GI) { - ++FI, ++GI; - continue; - } + IDMap &Map1 = Domains[Domain1]; + unsigned long &ID1 = Map1[V1]; + if (!ID1) + ID1 = ++DomainCount[Domain1]; - if (ValueMap[FI] != NULL) - return false; + IDMap &Map2 = Domains[Domain2]; + unsigned long &ID2 = Map2[V2]; + if (!ID2) + ID2 = ++DomainCount[Domain2]; - for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) { - Value *OpF = IgnoreBitcasts(FI->getOperand(i)); - Value *OpG = IgnoreBitcasts(GI->getOperand(i)); + return ID1 == ID2; +} - if (ValueMap[OpF] == OpG) - continue; +bool MergeFunctions::equals(const BasicBlock *BB1, const BasicBlock *BB2) { + BasicBlock::const_iterator FI = BB1->begin(), FE = BB1->end(); + BasicBlock::const_iterator GI = BB2->begin(), GE = BB2->end(); - if (ValueMap[OpF] != NULL) + do { + if (!compare(FI, GI)) + return false; + + if (isa<GetElementPtrInst>(FI) && isa<GetElementPtrInst>(GI)) { + const GetElementPtrInst *GEP1 = cast<GetElementPtrInst>(FI); + const GetElementPtrInst *GEP2 = cast<GetElementPtrInst>(GI); + + if (!compare(GEP1->getPointerOperand(), GEP2->getPointerOperand())) return false; - if (OpF->getValueID() != OpG->getValueID() || - !isEquivalentType(OpF->getType(), OpG->getType())) + if (!isEquivalentGEP(GEP1, GEP2)) + return false; + } else { + if (!isEquivalentOperation(FI, GI)) return false; - if (isa<PHINode>(FI)) { - if (SpeculationMap[OpF] == NULL) - SpeculationMap[OpF] = OpG; - else if (SpeculationMap[OpF] != OpG) - return false; - continue; - } else if (isa<BasicBlock>(OpF)) { - assert(isa<TerminatorInst>(FI) && - "BasicBlock referenced by non-Terminator non-PHI"); - // This call changes the ValueMap, hence we can't use - // Value *& = ValueMap[...] - if (!equals(cast<BasicBlock>(OpF), cast<BasicBlock>(OpG), ValueMap, - SpeculationMap)) - return false; - } else { + for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) { + Value *OpF = FI->getOperand(i); + Value *OpG = GI->getOperand(i); + if (!compare(OpF, OpG)) return false; - } - ValueMap[OpF] = OpG; + if (OpF->getValueID() != OpG->getValueID() || + !isEquivalentType(OpF->getType(), OpG->getType())) + return false; + } } - ValueMap[FI] = GI; ++FI, ++GI; } while (FI != FE && GI != GE); return FI == FE && GI == GE; } -static bool equals(const Function *F, const Function *G) { +bool MergeFunctions::equals(const Function *F, const Function *G) { // We need to recheck everything, but check the things that weren't included // in the hash first. @@ -382,27 +440,46 @@ static bool equals(const Function *F, const Function *G) { if (!isEquivalentType(F->getFunctionType(), G->getFunctionType())) return false; - DenseMap<const Value *, const Value *> ValueMap; - DenseMap<const Value *, const Value *> SpeculationMap; - ValueMap[F] = G; - assert(F->arg_size() == G->arg_size() && "Identical functions have a different number of args."); - for (Function::const_arg_iterator fi = F->arg_begin(), gi = G->arg_begin(), - fe = F->arg_end(); fi != fe; ++fi, ++gi) - ValueMap[fi] = gi; + LHS = F; + RHS = G; - if (!equals(&F->getEntryBlock(), &G->getEntryBlock(), ValueMap, - SpeculationMap)) - return false; + // Visit the arguments so that they get enumerated in the order they're + // passed in. + for (Function::const_arg_iterator fi = F->arg_begin(), gi = G->arg_begin(), + fe = F->arg_end(); fi != fe; ++fi, ++gi) { + if (!compare(fi, gi)) + llvm_unreachable("Arguments repeat"); + } - for (DenseMap<const Value *, const Value *>::iterator - I = SpeculationMap.begin(), E = SpeculationMap.end(); I != E; ++I) { - if (ValueMap[I->first] != I->second) + SmallVector<const BasicBlock *, 8> FBBs, GBBs; + SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F. + FBBs.push_back(&F->getEntryBlock()); + GBBs.push_back(&G->getEntryBlock()); + VisitedBBs.insert(FBBs[0]); + while (!FBBs.empty()) { + const BasicBlock *FBB = FBBs.pop_back_val(); + const BasicBlock *GBB = GBBs.pop_back_val(); + if (!compare(FBB, GBB) || !equals(FBB, GBB)) { + Domains.clear(); + DomainCount.clear(); return false; + } + const TerminatorInst *FTI = FBB->getTerminator(); + const TerminatorInst *GTI = GBB->getTerminator(); + assert(FTI->getNumSuccessors() == GTI->getNumSuccessors()); + for (unsigned i = 0, e = FTI->getNumSuccessors(); i != e; ++i) { + if (!VisitedBBs.insert(FTI->getSuccessor(i))) + continue; + FBBs.push_back(FTI->getSuccessor(i)); + GBBs.push_back(GTI->getSuccessor(i)); + } } + Domains.clear(); + DomainCount.clear(); return true; } @@ -476,20 +553,32 @@ static LinkageCategory categorize(const Function *F) { } static void ThunkGToF(Function *F, Function *G) { + if (!G->mayBeOverridden()) { + // Redirect direct callers of G to F. + Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType()); + for (Value::use_iterator UI = G->use_begin(), UE = G->use_end(); + UI != UE;) { + Value::use_iterator TheIter = UI; + ++UI; + CallSite CS(*TheIter); + if (CS && CS.isCallee(TheIter)) + TheIter.getUse().set(BitcastF); + } + } + Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "", G->getParent()); BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG); - std::vector<Value *> Args; + SmallVector<Value *, 16> Args; unsigned i = 0; const FunctionType *FFTy = F->getFunctionType(); for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end(); AI != AE; ++AI) { - if (FFTy->getParamType(i) == AI->getType()) + if (FFTy->getParamType(i) == AI->getType()) { Args.push_back(AI); - else { - Value *BCI = new BitCastInst(AI, FFTy->getParamType(i), "", BB); - Args.push_back(BCI); + } else { + Args.push_back(new BitCastInst(AI, FFTy->getParamType(i), "", BB)); } ++i; } @@ -510,8 +599,6 @@ static void ThunkGToF(Function *F, Function *G) { NewG->takeName(G); G->replaceAllUsesWith(NewG); G->eraseFromParent(); - - // TODO: look at direct callers to G and make them all direct callers to F. } static void AliasGToF(Function *F, Function *G) { @@ -542,67 +629,66 @@ static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) { } switch (catF) { + case ExternalStrong: + switch (catG) { case ExternalStrong: - switch (catG) { - case ExternalStrong: - case ExternalWeak: - ThunkGToF(F, G); - break; - case Internal: - if (G->hasAddressTaken()) - ThunkGToF(F, G); - else - AliasGToF(F, G); - break; - } + case ExternalWeak: + ThunkGToF(F, G); break; + case Internal: + if (G->hasAddressTaken()) + ThunkGToF(F, G); + else + AliasGToF(F, G); + break; + } + break; - case ExternalWeak: { - assert(catG == ExternalWeak); + case ExternalWeak: { + assert(catG == ExternalWeak); - // Make them both thunks to the same internal function. - F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); - Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", - F->getParent()); - H->copyAttributesFrom(F); - H->takeName(F); - F->replaceAllUsesWith(H); + // Make them both thunks to the same internal function. + F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); + Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", + F->getParent()); + H->copyAttributesFrom(F); + H->takeName(F); + F->replaceAllUsesWith(H); - ThunkGToF(F, G); - ThunkGToF(F, H); + ThunkGToF(F, G); + ThunkGToF(F, H); - F->setLinkage(GlobalValue::InternalLinkage); - } break; + F->setLinkage(GlobalValue::InternalLinkage); + } break; - case Internal: - switch (catG) { - case ExternalStrong: - llvm_unreachable(0); - // fall-through - case ExternalWeak: - if (F->hasAddressTaken()) - ThunkGToF(F, G); - else - AliasGToF(F, G); - break; - case Internal: { - bool addrTakenF = F->hasAddressTaken(); - bool addrTakenG = G->hasAddressTaken(); - if (!addrTakenF && addrTakenG) { - std::swap(FnVec[i], FnVec[j]); - std::swap(F, G); - std::swap(addrTakenF, addrTakenG); - } + case Internal: + switch (catG) { + case ExternalStrong: + llvm_unreachable(0); + // fall-through + case ExternalWeak: + if (F->hasAddressTaken()) + ThunkGToF(F, G); + else + AliasGToF(F, G); + break; + case Internal: { + bool addrTakenF = F->hasAddressTaken(); + bool addrTakenG = G->hasAddressTaken(); + if (!addrTakenF && addrTakenG) { + std::swap(FnVec[i], FnVec[j]); + std::swap(F, G); + std::swap(addrTakenF, addrTakenG); + } - if (addrTakenF && addrTakenG) { - ThunkGToF(F, G); - } else { - assert(!addrTakenG); - AliasGToF(F, G); - } - } break; + if (addrTakenF && addrTakenG) { + ThunkGToF(F, G); + } else { + assert(!addrTakenG); + AliasGToF(F, G); } - break; + } break; + } break; } ++NumFunctionsMerged; @@ -619,22 +705,20 @@ bool MergeFunctions::runOnModule(Module &M) { std::map<unsigned long, std::vector<Function *> > FnMap; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (F->isDeclaration() || F->isIntrinsic()) + if (F->isDeclaration()) continue; FnMap[hash(F)].push_back(F); } - // TODO: instead of running in a loop, we could also fold functions in - // callgraph order. Constructing the CFG probably isn't cheaper than just - // running in a loop, unless it happened to already be available. + TD = getAnalysisIfAvailable<TargetData>(); bool LocalChanged; do { LocalChanged = false; DEBUG(dbgs() << "size: " << FnMap.size() << "\n"); for (std::map<unsigned long, std::vector<Function *> >::iterator - I = FnMap.begin(), E = FnMap.end(); I != E; ++I) { + I = FnMap.begin(), E = FnMap.end(); I != E; ++I) { std::vector<Function *> &FnVec = I->second; DEBUG(dbgs() << "hash (" << I->first << "): " << FnVec.size() << "\n"); diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 310e4a2..6bc8e66 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -229,6 +229,12 @@ static bool StripDebugInfo(Module &M) { NMD->eraseFromParent(); } + NMD = M.getNamedMetadata("llvm.dbg.lv"); + if (NMD) { + Changed = true; + NMD->eraseFromParent(); + } + unsigned MDDbgKind = M.getMDKindID("dbg"); for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index bd06499..c7b04a4 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -51,7 +51,7 @@ static inline unsigned getComplexity(Value *V) { /// InstCombineIRInserter - This is an IRBuilder insertion helper that works /// just like the normal insertion helper, but also adds any new instructions /// to the instcombine worklist. -class VISIBILITY_HIDDEN InstCombineIRInserter +class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter : public IRBuilderDefaultInserter<true> { InstCombineWorklist &Worklist; public: @@ -65,7 +65,7 @@ public: }; /// InstCombiner - The -instcombine pass. -class VISIBILITY_HIDDEN InstCombiner +class LLVM_LIBRARY_VISIBILITY InstCombiner : public FunctionPass, public InstVisitor<InstCombiner, Instruction*> { TargetData *TD; diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index eb7628e..b0137c4 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -442,7 +442,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // If this cast is a truncate, evaluting in a different type always // eliminates the cast, so it is always a win. DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" - " to avoid cast: " << CI); + " to avoid cast: " << CI << '\n'); Value *Res = EvaluateInDifferentType(Src, DestTy, false); assert(Res->getType() == DestTy); return ReplaceInstUsesWith(CI, Res); @@ -1252,6 +1252,64 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { return commonPointerCastTransforms(CI); } +/// OptimizeVectorResize - This input value (which is known to have vector type) +/// is being zero extended or truncated to the specified vector type. Try to +/// replace it with a shuffle (and vector/vector bitcast) if possible. +/// +/// The source and destination vector types may have different element types. +static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy, + InstCombiner &IC) { + // We can only do this optimization if the output is a multiple of the input + // element size, or the input is a multiple of the output element size. + // Convert the input type to have the same element type as the output. + const VectorType *SrcTy = cast<VectorType>(InVal->getType()); + + if (SrcTy->getElementType() != DestTy->getElementType()) { + // The input types don't need to be identical, but for now they must be the + // same size. There is no specific reason we couldn't handle things like + // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten + // there yet. + if (SrcTy->getElementType()->getPrimitiveSizeInBits() != + DestTy->getElementType()->getPrimitiveSizeInBits()) + return 0; + + SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements()); + InVal = IC.Builder->CreateBitCast(InVal, SrcTy); + } + + // Now that the element types match, get the shuffle mask and RHS of the + // shuffle to use, which depends on whether we're increasing or decreasing the + // size of the input. + SmallVector<Constant*, 16> ShuffleMask; + Value *V2; + const IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext()); + + if (SrcTy->getNumElements() > DestTy->getNumElements()) { + // If we're shrinking the number of elements, just shuffle in the low + // elements from the input and use undef as the second shuffle input. + V2 = UndefValue::get(SrcTy); + for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i) + ShuffleMask.push_back(ConstantInt::get(Int32Ty, i)); + + } else { + // If we're increasing the number of elements, shuffle in all of the + // elements from InVal and fill the rest of the result elements with zeros + // from a constant zero. + V2 = Constant::getNullValue(SrcTy); + unsigned SrcElts = SrcTy->getNumElements(); + for (unsigned i = 0, e = SrcElts; i != e; ++i) + ShuffleMask.push_back(ConstantInt::get(Int32Ty, i)); + + // The excess elements reference the first element of the zero input. + ShuffleMask.append(DestTy->getNumElements()-SrcElts, + ConstantInt::get(Int32Ty, SrcElts)); + } + + Constant *Mask = ConstantVector::get(ShuffleMask.data(), ShuffleMask.size()); + return new ShuffleVectorInst(InVal, V2, Mask); +} + + Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If the operands are integer typed then apply the integer transforms, // otherwise just apply the common ones. @@ -1310,6 +1368,18 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) } + + // If this is a cast from an integer to vector, check to see if the input + // is a trunc or zext of a bitcast from vector. If so, we can replace all + // the casts with a shuffle and (potentially) a bitcast. + if (isa<IntegerType>(SrcTy) && (isa<TruncInst>(Src) || isa<ZExtInst>(Src))){ + CastInst *SrcCast = cast<CastInst>(Src); + if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0))) + if (isa<VectorType>(BCIn->getOperand(0)->getType())) + if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0), + cast<VectorType>(DestTy), *this)) + return I; + } } if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) { diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h index 9d88621..9100a85 100644 --- a/lib/Transforms/InstCombine/InstCombineWorklist.h +++ b/lib/Transforms/InstCombine/InstCombineWorklist.h @@ -22,7 +22,7 @@ namespace llvm { /// InstCombineWorklist - This is the worklist management logic for /// InstCombine. -class VISIBILITY_HIDDEN InstCombineWorklist { +class LLVM_LIBRARY_VISIBILITY InstCombineWorklist { SmallVector<Instruction*, 256> Worklist; DenseMap<Instruction*, unsigned> WorklistMap; diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index 5778864..1a3b10c 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -26,6 +26,7 @@ add_llvm_library(LLVMScalarOpts SimplifyCFGPass.cpp SimplifyHalfPowrLibCalls.cpp SimplifyLibCalls.cpp + Sink.cpp TailDuplication.cpp TailRecursionElimination.cpp ) diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 65b34b1..ca8ab49 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -868,7 +868,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, const Type *StoredValTy = StoredVal->getType(); - uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy); + uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy); uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); // If the store and reload are the same size, we can always reuse it. @@ -1132,8 +1132,8 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, Instruction *InsertPt, const TargetData &TD){ LLVMContext &Ctx = SrcVal->getType()->getContext(); - uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8; - uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; + uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; + uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8; IRBuilder<> Builder(InsertPt->getParent(), InsertPt); @@ -1604,7 +1604,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, } } if (!NeedToSplit.empty()) { - toSplit.append(NeedToSplit.size(), NeedToSplit.front()); + toSplit.append(NeedToSplit.begin(), NeedToSplit.end()); return false; } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index cf3d16f..86ea3eb 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -107,11 +107,13 @@ namespace { class RegUseTracker { typedef DenseMap<const SCEV *, RegSortData> RegUsesTy; - RegUsesTy RegUses; + RegUsesTy RegUsesMap; SmallVector<const SCEV *, 16> RegSequence; public: void CountRegister(const SCEV *Reg, size_t LUIdx); + void DropRegister(const SCEV *Reg, size_t LUIdx); + void DropUse(size_t LUIdx); bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const; @@ -132,7 +134,7 @@ public: void RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) { std::pair<RegUsesTy::iterator, bool> Pair = - RegUses.insert(std::make_pair(Reg, RegSortData())); + RegUsesMap.insert(std::make_pair(Reg, RegSortData())); RegSortData &RSD = Pair.first->second; if (Pair.second) RegSequence.push_back(Reg); @@ -140,11 +142,28 @@ RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) { RSD.UsedByIndices.set(LUIdx); } +void +RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) { + RegUsesTy::iterator It = RegUsesMap.find(Reg); + assert(It != RegUsesMap.end()); + RegSortData &RSD = It->second; + assert(RSD.UsedByIndices.size() > LUIdx); + RSD.UsedByIndices.reset(LUIdx); +} + +void +RegUseTracker::DropUse(size_t LUIdx) { + // Remove the use index from every register's use list. + for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end(); + I != E; ++I) + I->second.UsedByIndices.reset(LUIdx); +} + bool RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const { - if (!RegUses.count(Reg)) return false; + if (!RegUsesMap.count(Reg)) return false; const SmallBitVector &UsedByIndices = - RegUses.find(Reg)->second.UsedByIndices; + RegUsesMap.find(Reg)->second.UsedByIndices; int i = UsedByIndices.find_first(); if (i == -1) return false; if ((size_t)i != LUIdx) return true; @@ -152,13 +171,13 @@ RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const { } const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const { - RegUsesTy::const_iterator I = RegUses.find(Reg); - assert(I != RegUses.end() && "Unknown register!"); + RegUsesTy::const_iterator I = RegUsesMap.find(Reg); + assert(I != RegUsesMap.end() && "Unknown register!"); return I->second.UsedByIndices; } void RegUseTracker::clear() { - RegUses.clear(); + RegUsesMap.clear(); RegSequence.clear(); } @@ -188,6 +207,8 @@ struct Formula { unsigned getNumRegs() const; const Type *getType() const; + void DeleteBaseReg(const SCEV *&S); + bool referencesReg(const SCEV *S) const; bool hasRegsUsedByUsesOtherThan(size_t LUIdx, const RegUseTracker &RegUses) const; @@ -291,6 +312,13 @@ const Type *Formula::getType() const { 0; } +/// DeleteBaseReg - Delete the given base reg from the BaseRegs list. +void Formula::DeleteBaseReg(const SCEV *&S) { + if (&S != &BaseRegs.back()) + std::swap(S, BaseRegs.back()); + BaseRegs.pop_back(); +} + /// referencesReg - Test if this formula references the given register. bool Formula::referencesReg(const SCEV *S) const { return S == ScaledReg || @@ -326,6 +354,13 @@ void Formula::print(raw_ostream &OS) const { if (!First) OS << " + "; else First = false; OS << "reg(" << **I << ')'; } + if (AM.HasBaseReg && BaseRegs.empty()) { + if (!First) OS << " + "; else First = false; + OS << "**error: HasBaseReg**"; + } else if (!AM.HasBaseReg && !BaseRegs.empty()) { + if (!First) OS << " + "; else First = false; + OS << "**error: !HasBaseReg**"; + } if (AM.Scale != 0) { if (!First) OS << " + "; else First = false; OS << AM.Scale << "*reg("; @@ -345,8 +380,7 @@ void Formula::dump() const { /// without changing its value. static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { const Type *WideTy = - IntegerType::get(SE.getContext(), - SE.getTypeSizeInBits(AR->getType()) + 1); + IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1); return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy)); } @@ -354,8 +388,7 @@ static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { /// without changing its value. static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) { const Type *WideTy = - IntegerType::get(SE.getContext(), - SE.getTypeSizeInBits(A->getType()) + 1); + IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1); return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy)); } @@ -363,8 +396,7 @@ static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) { /// without changing its value. static bool isMulSExtable(const SCEVMulExpr *A, ScalarEvolution &SE) { const Type *WideTy = - IntegerType::get(SE.getContext(), - SE.getTypeSizeInBits(A->getType()) + 1); + IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1); return isa<SCEVMulExpr>(SE.getSignExtendExpr(A, WideTy)); } @@ -432,14 +464,14 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, bool Found = false; for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end(); I != E; ++I) { + const SCEV *S = *I; if (!Found) - if (const SCEV *Q = getExactSDiv(*I, RHS, SE, + if (const SCEV *Q = getExactSDiv(S, RHS, SE, IgnoreSignificantBits)) { - Ops.push_back(Q); + S = Q; Found = true; - continue; } - Ops.push_back(*I); + Ops.push_back(S); } return Found ? SE.getMulExpr(Ops) : 0; } @@ -810,8 +842,7 @@ struct LSRFixup { } LSRFixup::LSRFixup() - : UserInst(0), OperandValToReplace(0), - LUIdx(~size_t(0)), Offset(0) {} + : UserInst(0), OperandValToReplace(0), LUIdx(~size_t(0)), Offset(0) {} /// isUseFullyOutsideLoop - Test whether this fixup always uses its /// value outside of the given loop. @@ -934,7 +965,10 @@ public: MaxOffset(INT64_MIN), AllFixupsOutsideLoop(true) {} + bool HasFormulaWithSameRegs(const Formula &F) const; bool InsertFormula(const Formula &F); + void DeleteFormula(Formula &F); + void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses); void check() const; @@ -942,6 +976,16 @@ public: void dump() const; }; +/// HasFormula - Test whether this use as a formula which has the same +/// registers as the given formula. +bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { + SmallVector<const SCEV *, 2> Key = F.BaseRegs; + if (F.ScaledReg) Key.push_back(F.ScaledReg); + // Unstable sort by host order ok, because this is only used for uniquifying. + std::sort(Key.begin(), Key.end()); + return Uniquifier.count(Key); +} + /// InsertFormula - If the given formula has not yet been inserted, add it to /// the list, and return true. Return false otherwise. bool LSRUse::InsertFormula(const Formula &F) { @@ -972,6 +1016,33 @@ bool LSRUse::InsertFormula(const Formula &F) { return true; } +/// DeleteFormula - Remove the given formula from this use's list. +void LSRUse::DeleteFormula(Formula &F) { + if (&F != &Formulae.back()) + std::swap(F, Formulae.back()); + Formulae.pop_back(); + assert(!Formulae.empty() && "LSRUse has no formulae left!"); +} + +/// RecomputeRegs - Recompute the Regs field, and update RegUses. +void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) { + // Now that we've filtered out some formulae, recompute the Regs set. + SmallPtrSet<const SCEV *, 4> OldRegs = Regs; + Regs.clear(); + for (SmallVectorImpl<Formula>::const_iterator I = Formulae.begin(), + E = Formulae.end(); I != E; ++I) { + const Formula &F = *I; + if (F.ScaledReg) Regs.insert(F.ScaledReg); + Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); + } + + // Update the RegTracker. + for (SmallPtrSet<const SCEV *, 4>::iterator I = OldRegs.begin(), + E = OldRegs.end(); I != E; ++I) + if (!Regs.count(*I)) + RegUses.DropRegister(*I, LUIdx); +} + void LSRUse::print(raw_ostream &OS) const { OS << "LSR Use: Kind="; switch (Kind) { @@ -1091,6 +1162,13 @@ static bool isAlwaysFoldable(int64_t BaseOffs, AM.HasBaseReg = HasBaseReg; AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1; + // Canonicalize a scale of 1 to a base register if the formula doesn't + // already have a base register. + if (!AM.HasBaseReg && AM.Scale == 1) { + AM.Scale = 0; + AM.HasBaseReg = true; + } + return isLegalUse(AM, Kind, AccessTy, TLI); } @@ -1186,7 +1264,7 @@ class LSRInstance { void OptimizeShadowIV(); bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse); ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse); - bool OptimizeLoopTermCond(); + void OptimizeLoopTermCond(); void CollectInterestingTypesAndFactors(); void CollectFixupsAndInitialFormulae(); @@ -1200,13 +1278,17 @@ class LSRInstance { typedef DenseMap<const SCEV *, size_t> UseMapTy; UseMapTy UseMap; - bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, + bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, LSRUse::KindType Kind, const Type *AccessTy); std::pair<size_t, int64_t> getUse(const SCEV *&Expr, LSRUse::KindType Kind, const Type *AccessTy); + void DeleteUse(LSRUse &LU); + + LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU); + public: void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx); void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx); @@ -1227,6 +1309,8 @@ public: void GenerateAllReuseFormulae(); void FilterOutUndesirableDedicatedRegisters(); + + size_t EstimateSearchSpaceComplexity() const; void NarrowSearchSpaceUsingHeuristics(); void SolveRecurse(SmallVectorImpl<const Formula *> &Solution, @@ -1375,6 +1459,7 @@ void LSRInstance::OptimizeShadowIV() { /* Remove cast operation */ ShadowUse->replaceAllUsesWith(NewPH); ShadowUse->eraseFromParent(); + Changed = true; break; } } @@ -1382,8 +1467,7 @@ void LSRInstance::OptimizeShadowIV() { /// FindIVUserForCond - If Cond has an operand that is an expression of an IV, /// set the IV user and stride information and return true, otherwise return /// false. -bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, - IVStrideUse *&CondUse) { +bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) { for (IVUsers::iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) if (UI->getUser() == Cond) { // NOTE: we could handle setcc instructions with multiple uses here, but @@ -1555,7 +1639,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { /// OptimizeLoopTermCond - Change loop terminating condition to use the /// postinc iv when possible. -bool +void LSRInstance::OptimizeLoopTermCond() { SmallPtrSet<Instruction *, 4> PostIncs; @@ -1621,13 +1705,13 @@ LSRInstance::OptimizeLoopTermCond() { } if (const SCEVConstant *D = dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) { + const ConstantInt *C = D->getValue(); // Stride of one or negative one can have reuse with non-addresses. - if (D->getValue()->isOne() || - D->getValue()->isAllOnesValue()) + if (C->isOne() || C->isAllOnesValue()) goto decline_post_inc; // Avoid weird situations. - if (D->getValue()->getValue().getMinSignedBits() >= 64 || - D->getValue()->getValue().isMinSignedValue()) + if (C->getValue().getMinSignedBits() >= 64 || + C->getValue().isMinSignedValue()) goto decline_post_inc; // Without TLI, assume that any stride might be valid, and so any // use might be shared. @@ -1636,7 +1720,7 @@ LSRInstance::OptimizeLoopTermCond() { // Check for possible scaled-address reuse. const Type *AccessTy = getAccessType(UI->getUser()); TargetLowering::AddrMode AM; - AM.Scale = D->getValue()->getSExtValue(); + AM.Scale = C->getSExtValue(); if (TLI->isLegalAddressingMode(AM, AccessTy)) goto decline_post_inc; AM.Scale = -AM.Scale; @@ -1691,12 +1775,13 @@ LSRInstance::OptimizeLoopTermCond() { else if (BB != IVIncInsertPos->getParent()) IVIncInsertPos = BB->getTerminator(); } - - return Changed; } +/// reconcileNewOffset - Determine if the given use can accomodate a fixup +/// at the given offset and other details. If so, update the use and +/// return true. bool -LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, +LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, LSRUse::KindType Kind, const Type *AccessTy) { int64_t NewMinOffset = LU.MinOffset; int64_t NewMaxOffset = LU.MaxOffset; @@ -1709,12 +1794,12 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, return false; // Conservatively assume HasBaseReg is true for now. if (NewOffset < LU.MinOffset) { - if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, /*HasBaseReg=*/true, + if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg, Kind, AccessTy, TLI)) return false; NewMinOffset = NewOffset; } else if (NewOffset > LU.MaxOffset) { - if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, /*HasBaseReg=*/true, + if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg, Kind, AccessTy, TLI)) return false; NewMaxOffset = NewOffset; @@ -1753,7 +1838,7 @@ LSRInstance::getUse(const SCEV *&Expr, // A use already existed with this base. size_t LUIdx = P.first->second; LSRUse &LU = Uses[LUIdx]; - if (reconcileNewOffset(LU, Offset, Kind, AccessTy)) + if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy)) // Reuse this use. return std::make_pair(LUIdx, Offset); } @@ -1774,6 +1859,47 @@ LSRInstance::getUse(const SCEV *&Expr, return std::make_pair(LUIdx, Offset); } +/// DeleteUse - Delete the given use from the Uses list. +void LSRInstance::DeleteUse(LSRUse &LU) { + if (&LU != &Uses.back()) + std::swap(LU, Uses.back()); + Uses.pop_back(); +} + +/// FindUseWithFormula - Look for a use distinct from OrigLU which is has +/// a formula that has the same registers as the given formula. +LSRUse * +LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF, + const LSRUse &OrigLU) { + // Search all uses for the formula. This could be more clever. Ignore + // ICmpZero uses because they may contain formulae generated by + // GenerateICmpZeroScales, in which case adding fixup offsets may + // be invalid. + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + LSRUse &LU = Uses[LUIdx]; + if (&LU != &OrigLU && + LU.Kind != LSRUse::ICmpZero && + LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy && + LU.HasFormulaWithSameRegs(OrigF)) { + for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(), + E = LU.Formulae.end(); I != E; ++I) { + const Formula &F = *I; + if (F.BaseRegs == OrigF.BaseRegs && + F.ScaledReg == OrigF.ScaledReg && + F.AM.BaseGV == OrigF.AM.BaseGV && + F.AM.Scale == OrigF.AM.Scale && + LU.Kind) { + if (F.AM.BaseOffs == 0) + return &LU; + break; + } + } + } + } + + return 0; +} + void LSRInstance::CollectInterestingTypesAndFactors() { SmallSetVector<const SCEV *, 4> Strides; @@ -1867,6 +1993,8 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { if (NV == LF.OperandValToReplace) { CI->setOperand(1, CI->getOperand(0)); CI->setOperand(0, NV); + NV = CI->getOperand(1); + Changed = true; } // x == y --> x - y == 0 @@ -1901,6 +2029,9 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { DEBUG(print_fixups(dbgs())); } +/// InsertInitialFormula - Insert a formula for the given expression into +/// the given use, separating out loop-variant portions from loop-invariant +/// and loop-computable portions. void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) { Formula F; @@ -1909,6 +2040,8 @@ LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) { assert(Inserted && "Initial formula already exists!"); (void)Inserted; } +/// InsertSupplementalFormula - Insert a simple single-register formula for +/// the given expression into the given use. void LSRInstance::InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) { @@ -2265,8 +2398,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, /// GenerateScales - Generate stride factor reuse formulae by making use of /// scaled-offset address modes, for example. -void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, - Formula Base) { +void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { // Determine the integer type for the base formula. const Type *IntTy = Base.getType(); if (!IntTy) return; @@ -2312,8 +2444,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, // TODO: This could be optimized to avoid all the copying. Formula F = Base; F.ScaledReg = Quotient; - std::swap(F.BaseRegs[i], F.BaseRegs.back()); - F.BaseRegs.pop_back(); + F.DeleteBaseReg(F.BaseRegs[i]); (void)InsertFormula(LU, LUIdx, F); } } @@ -2321,8 +2452,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, } /// GenerateTruncates - Generate reuse formulae from different IV types. -void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, - Formula Base) { +void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { // This requires TargetLowering to tell us which truncates are free. if (!TLI) return; @@ -2479,7 +2609,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { // TODO: Use a more targeted data structure. for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) { - Formula F = LU.Formulae[L]; + const Formula &F = LU.Formulae[L]; // Use the immediate in the scaled register. if (F.ScaledReg == OrigReg) { int64_t Offs = (uint64_t)F.AM.BaseOffs + @@ -2527,10 +2657,11 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end(); J != JE; ++J) if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J)) - if (C->getValue()->getValue().isNegative() != - (NewF.AM.BaseOffs < 0) && - C->getValue()->getValue().abs() - .ule(abs64(NewF.AM.BaseOffs))) + if ((C->getValue()->getValue() + NewF.AM.BaseOffs).abs().slt( + abs64(NewF.AM.BaseOffs)) && + (C->getValue()->getValue() + + NewF.AM.BaseOffs).countTrailingZeros() >= + CountTrailingZeros_64(NewF.AM.BaseOffs)) goto skip_formula; // Ok, looks good. @@ -2579,7 +2710,7 @@ LSRInstance::GenerateAllReuseFormulae() { /// by other uses, pick the best one and delete the others. void LSRInstance::FilterOutUndesirableDedicatedRegisters() { #ifndef NDEBUG - bool Changed = false; + bool ChangedFormulae = false; #endif // Collect the best formula for each unique set of shared registers. This @@ -2591,10 +2722,9 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { LSRUse &LU = Uses[LUIdx]; FormulaSorter Sorter(L, LU, SE, DT); + DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n'); - // Clear out the set of used regs; it will be recomputed. - LU.Regs.clear(); - + bool Any = false; for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms; ++FIdx) { Formula &F = LU.Formulae[FIdx]; @@ -2619,62 +2749,200 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { Formula &Best = LU.Formulae[P.first->second]; if (Sorter.operator()(F, Best)) std::swap(F, Best); - DEBUG(dbgs() << "Filtering out "; F.print(dbgs()); + DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); dbgs() << "\n" - " in favor of "; Best.print(dbgs()); + " in favor of formula "; Best.print(dbgs()); dbgs() << '\n'); #ifndef NDEBUG - Changed = true; + ChangedFormulae = true; #endif - std::swap(F, LU.Formulae.back()); - LU.Formulae.pop_back(); + LU.DeleteFormula(F); --FIdx; --NumForms; + Any = true; continue; } - if (F.ScaledReg) LU.Regs.insert(F.ScaledReg); - LU.Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); } + + // Now that we've filtered out some formulae, recompute the Regs set. + if (Any) + LU.RecomputeRegs(LUIdx, RegUses); + + // Reset this to prepare for the next use. BestFormulae.clear(); } - DEBUG(if (Changed) { + DEBUG(if (ChangedFormulae) { dbgs() << "\n" "After filtering out undesirable candidates:\n"; print_uses(dbgs()); }); } +// This is a rough guess that seems to work fairly well. +static const size_t ComplexityLimit = UINT16_MAX; + +/// EstimateSearchSpaceComplexity - Estimate the worst-case number of +/// solutions the solver might have to consider. It almost never considers +/// this many solutions because it prune the search space, but the pruning +/// isn't always sufficient. +size_t LSRInstance::EstimateSearchSpaceComplexity() const { + uint32_t Power = 1; + for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), + E = Uses.end(); I != E; ++I) { + size_t FSize = I->Formulae.size(); + if (FSize >= ComplexityLimit) { + Power = ComplexityLimit; + break; + } + Power *= FSize; + if (Power >= ComplexityLimit) + break; + } + return Power; +} + /// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of /// formulae to choose from, use some rough heuristics to prune down the number /// of formulae. This keeps the main solver from taking an extraordinary amount /// of time in some worst-case scenarios. void LSRInstance::NarrowSearchSpaceUsingHeuristics() { - // This is a rough guess that seems to work fairly well. - const size_t Limit = UINT16_MAX; + if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { + DEBUG(dbgs() << "The search space is too complex.\n"); - SmallPtrSet<const SCEV *, 4> Taken; - for (;;) { - // Estimate the worst-case number of solutions we might consider. We almost - // never consider this many solutions because we prune the search space, - // but the pruning isn't always sufficient. - uint32_t Power = 1; - for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), - E = Uses.end(); I != E; ++I) { - size_t FSize = I->Formulae.size(); - if (FSize >= Limit) { - Power = Limit; - break; + DEBUG(dbgs() << "Narrowing the search space by eliminating formulae " + "which use a superset of registers used by other " + "formulae.\n"); + + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + LSRUse &LU = Uses[LUIdx]; + bool Any = false; + for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) { + Formula &F = LU.Formulae[i]; + // Look for a formula with a constant or GV in a register. If the use + // also has a formula with that same value in an immediate field, + // delete the one that uses a register. + for (SmallVectorImpl<const SCEV *>::const_iterator + I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) { + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) { + Formula NewF = F; + NewF.AM.BaseOffs += C->getValue()->getSExtValue(); + NewF.BaseRegs.erase(NewF.BaseRegs.begin() + + (I - F.BaseRegs.begin())); + if (LU.HasFormulaWithSameRegs(NewF)) { + DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n'); + LU.DeleteFormula(F); + --i; + --e; + Any = true; + break; + } + } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) { + if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) + if (!F.AM.BaseGV) { + Formula NewF = F; + NewF.AM.BaseGV = GV; + NewF.BaseRegs.erase(NewF.BaseRegs.begin() + + (I - F.BaseRegs.begin())); + if (LU.HasFormulaWithSameRegs(NewF)) { + DEBUG(dbgs() << " Deleting "; F.print(dbgs()); + dbgs() << '\n'); + LU.DeleteFormula(F); + --i; + --e; + Any = true; + break; + } + } + } + } } - Power *= FSize; - if (Power >= Limit) - break; + if (Any) + LU.RecomputeRegs(LUIdx, RegUses); } - if (Power < Limit) - break; + DEBUG(dbgs() << "After pre-selection:\n"; + print_uses(dbgs())); + } + + if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { + DEBUG(dbgs() << "The search space is too complex.\n"); + + DEBUG(dbgs() << "Narrowing the search space by assuming that uses " + "separated by a constant offset will use the same " + "registers.\n"); + + // This is especially useful for unrolled loops. + + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + LSRUse &LU = Uses[LUIdx]; + for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(), + E = LU.Formulae.end(); I != E; ++I) { + const Formula &F = *I; + if (F.AM.BaseOffs != 0 && F.AM.Scale == 0) { + if (LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU)) { + if (reconcileNewOffset(*LUThatHas, F.AM.BaseOffs, + /*HasBaseReg=*/false, + LU.Kind, LU.AccessTy)) { + DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); + dbgs() << '\n'); + + LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop; + + // Delete formulae from the new use which are no longer legal. + bool Any = false; + for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) { + Formula &F = LUThatHas->Formulae[i]; + if (!isLegalUse(F.AM, + LUThatHas->MinOffset, LUThatHas->MaxOffset, + LUThatHas->Kind, LUThatHas->AccessTy, TLI)) { + DEBUG(dbgs() << " Deleting "; F.print(dbgs()); + dbgs() << '\n'); + LUThatHas->DeleteFormula(F); + --i; + --e; + Any = true; + } + } + if (Any) + LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses); + + // Update the relocs to reference the new use. + for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(), + E = Fixups.end(); I != E; ++I) { + LSRFixup &Fixup = *I; + if (Fixup.LUIdx == LUIdx) { + Fixup.LUIdx = LUThatHas - &Uses.front(); + Fixup.Offset += F.AM.BaseOffs; + DEBUG(errs() << "New fixup has offset " + << Fixup.Offset << '\n'); + } + if (Fixup.LUIdx == NumUses-1) + Fixup.LUIdx = LUIdx; + } + + // Delete the old use. + DeleteUse(LU); + --LUIdx; + --NumUses; + break; + } + } + } + } + } + + DEBUG(dbgs() << "After pre-selection:\n"; + print_uses(dbgs())); + } + + // With all other options exhausted, loop until the system is simple + // enough to handle. + SmallPtrSet<const SCEV *, 4> Taken; + while (EstimateSearchSpaceComplexity() >= ComplexityLimit) { // Ok, we have too many of formulae on our hands to conveniently handle. // Use a rough heuristic to thin out the list. + DEBUG(dbgs() << "The search space is too complex.\n"); // Pick the register which is used by the most LSRUses, which is likely // to be a good reuse register candidate. @@ -2702,28 +2970,26 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() { // In any use with formulae which references this register, delete formulae // which don't reference it. - for (SmallVectorImpl<LSRUse>::iterator I = Uses.begin(), - E = Uses.end(); I != E; ++I) { - LSRUse &LU = *I; + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + LSRUse &LU = Uses[LUIdx]; if (!LU.Regs.count(Best)) continue; - // Clear out the set of used regs; it will be recomputed. - LU.Regs.clear(); - + bool Any = false; for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) { Formula &F = LU.Formulae[i]; if (!F.referencesReg(Best)) { DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n'); - std::swap(LU.Formulae.back(), F); - LU.Formulae.pop_back(); + LU.DeleteFormula(F); --e; --i; + Any = true; + assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?"); continue; } - - if (F.ScaledReg) LU.Regs.insert(F.ScaledReg); - LU.Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); } + + if (Any) + LU.RecomputeRegs(LUIdx, RegUses); } DEBUG(dbgs() << "After pre-selection:\n"; @@ -2810,11 +3076,14 @@ retry: // If none of the formulae had all of the required registers, relax the // constraint so that we don't exclude all formulae. if (!AnySatisfiedReqRegs) { + assert(!ReqRegs.empty() && "Solver failed even without required registers"); ReqRegs.clear(); goto retry; } } +/// Solve - Choose one formula from each use. Return the results in the given +/// Solution vector. void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const { SmallVector<const Formula *, 8> Workspace; Cost SolutionCost; @@ -2824,6 +3093,7 @@ void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const { DenseSet<const SCEV *> VisitedRegs; Workspace.reserve(Uses.size()); + // SolveRecurse does all the work. SolveRecurse(Solution, SolutionCost, Workspace, CurCost, CurRegs, VisitedRegs); @@ -2839,17 +3109,8 @@ void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const { Solution[i]->print(dbgs()); dbgs() << '\n'; }); -} -/// getImmediateDominator - A handy utility for the specific DominatorTree -/// query that we need here. -/// -static BasicBlock *getImmediateDominator(BasicBlock *BB, DominatorTree &DT) { - DomTreeNode *Node = DT.getNode(BB); - if (!Node) return 0; - Node = Node->getIDom(); - if (!Node) return 0; - return Node->getBlock(); + assert(Solution.size() == Uses.size() && "Malformed solution!"); } /// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up @@ -2865,9 +3126,11 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0; BasicBlock *IDom; - for (BasicBlock *Rung = IP->getParent(); ; Rung = IDom) { - IDom = getImmediateDominator(Rung, DT); - if (!IDom) return IP; + for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) { + if (!Rung) return IP; + Rung = Rung->getIDom(); + if (!Rung) return IP; + IDom = Rung->getBlock(); // Don't climb into a loop though. const Loop *IDomLoop = LI.getLoopFor(IDom); @@ -2891,7 +3154,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, // instead of at the end, so that it can be used for other expansions. if (IDom == Inst->getParent() && (!BetterPos || DT.dominates(BetterPos, Inst))) - BetterPos = next(BasicBlock::iterator(Inst)); + BetterPos = llvm::next(BasicBlock::iterator(Inst)); } if (!AllDominate) break; @@ -2957,6 +3220,8 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP, return IP; } +/// Expand - Emit instructions for the leading candidate expression for this +/// LSRUse (this is called "expanding"). Value *LSRInstance::Expand(const LSRFixup &LF, const Formula &F, BasicBlock::iterator IP, @@ -3212,6 +3477,8 @@ void LSRInstance::Rewrite(const LSRFixup &LF, DeadInsts.push_back(LF.OperandValToReplace); } +/// ImplementSolution - Rewrite all the fixup locations with new values, +/// following the chosen solution. void LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution, Pass *P) { @@ -3224,10 +3491,11 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution, Rewriter.setIVIncInsertPos(L, IVIncInsertPos); // Expand the new value definitions and update the users. - for (size_t i = 0, e = Fixups.size(); i != e; ++i) { - size_t LUIdx = Fixups[i].LUIdx; + for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(), + E = Fixups.end(); I != E; ++I) { + const LSRFixup &Fixup = *I; - Rewrite(Fixups[i], *Solution[LUIdx], Rewriter, DeadInsts, P); + Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts, P); Changed = true; } @@ -3256,13 +3524,11 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false); dbgs() << ":\n"); - /// OptimizeShadowIV - If IV is used in a int-to-float cast - /// inside the loop then try to eliminate the cast operation. + // First, perform some low-level loop optimizations. OptimizeShadowIV(); + OptimizeLoopTermCond(); - // Change loop terminating condition to use the postinc iv when possible. - Changed |= OptimizeLoopTermCond(); - + // Start collecting data and preparing for the solver. CollectInterestingTypesAndFactors(); CollectFixupsAndInitialFormulae(); CollectLoopInvariantFixupsAndFormulae(); @@ -3283,7 +3549,6 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) SmallVector<const Formula *, 8> Solution; Solve(Solution); - assert(Solution.size() == Uses.size() && "Malformed solution!"); // Release memory that is no longer needed. Factors.clear(); @@ -3333,9 +3598,8 @@ void LSRInstance::print_fixups(raw_ostream &OS) const { OS << "LSR is examining the following fixup sites:\n"; for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(), E = Fixups.end(); I != E; ++I) { - const LSRFixup &LF = *I; dbgs() << " "; - LF.print(OS); + I->print(OS); OS << '\n'; } } diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index b621e8d..9744100 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -58,13 +58,20 @@ FunctionPass *llvm::createCFGSimplificationPass() { /// ChangeToUnreachable - Insert an unreachable instruction before the specified /// instruction, making it and the rest of the code in the block dead. -static void ChangeToUnreachable(Instruction *I) { +static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) { BasicBlock *BB = I->getParent(); // Loop over all of the successors, removing BB's entry from any PHI // nodes. for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) (*SI)->removePredecessor(BB); + // Insert a call to llvm.trap right before this. This turns the undefined + // behavior into a hard fail instead of falling through into random code. + if (UseLLVMTrap) { + Function *TrapFn = + Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap); + CallInst::Create(TrapFn, "", I); + } new UnreachableInst(I->getContext(), I); // All instructions after this are dead. @@ -118,7 +125,8 @@ static bool MarkAliveBlocks(BasicBlock *BB, // though. ++BBI; if (!isa<UnreachableInst>(BBI)) { - ChangeToUnreachable(BBI); + // Don't insert a call to llvm.trap right before the unreachable. + ChangeToUnreachable(BBI, false); Changed = true; } break; @@ -134,7 +142,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, if (isa<UndefValue>(Ptr) || (isa<ConstantPointerNull>(Ptr) && SI->getPointerAddressSpace() == 0)) { - ChangeToUnreachable(SI); + ChangeToUnreachable(SI, true); Changed = true; break; } diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index b053cfc..7414be7 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -558,10 +558,13 @@ struct MemCmpOpt : public LibCallOptimization { if (Len == 0) // memcmp(s1,s2,0) -> 0 return Constant::getNullValue(CI->getType()); - if (Len == 1) { // memcmp(S1,S2,1) -> *LHS - *RHS - Value *LHSV = B.CreateLoad(CastToCStr(LHS, B), "lhsv"); - Value *RHSV = B.CreateLoad(CastToCStr(RHS, B), "rhsv"); - return B.CreateSExt(B.CreateSub(LHSV, RHSV, "chardiff"), CI->getType()); + // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS + if (Len == 1) { + Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"), + CI->getType(), "lhsv"); + Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"), + CI->getType(), "rhsv"); + return B.CreateSub(LHSV, RHSV, "chardiff"); } // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant) diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp new file mode 100644 index 0000000..b88ba48 --- /dev/null +++ b/lib/Transforms/Scalar/Sink.cpp @@ -0,0 +1,267 @@ +//===-- Sink.cpp - Code Sinking -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass moves instructions into successor blocks, when possible, so that +// they aren't executed on paths where their results aren't needed. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sink" +#include "llvm/Transforms/Scalar.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +STATISTIC(NumSunk, "Number of instructions sunk"); + +namespace { + class Sinking : public FunctionPass { + DominatorTree *DT; + LoopInfo *LI; + AliasAnalysis *AA; + + public: + static char ID; // Pass identification + Sinking() : FunctionPass(&ID) {} + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + FunctionPass::getAnalysisUsage(AU); + AU.addRequired<AliasAnalysis>(); + AU.addRequired<DominatorTree>(); + AU.addRequired<LoopInfo>(); + AU.addPreserved<DominatorTree>(); + AU.addPreserved<LoopInfo>(); + } + private: + bool ProcessBlock(BasicBlock &BB); + bool SinkInstruction(Instruction *I, SmallPtrSet<Instruction *, 8> &Stores); + bool AllUsesDominatedByBlock(Instruction *Inst, BasicBlock *BB) const; + }; +} // end anonymous namespace + +char Sinking::ID = 0; +static RegisterPass<Sinking> +X("sink", "Code sinking"); + +FunctionPass *llvm::createSinkingPass() { return new Sinking(); } + +/// AllUsesDominatedByBlock - Return true if all uses of the specified value +/// occur in blocks dominated by the specified block. +bool Sinking::AllUsesDominatedByBlock(Instruction *Inst, + BasicBlock *BB) const { + // Ignoring debug uses is necessary so debug info doesn't affect the code. + // This may leave a referencing dbg_value in the original block, before + // the definition of the vreg. Dwarf generator handles this although the + // user might not get the right info at runtime. + for (Value::use_iterator I = Inst->use_begin(), + E = Inst->use_end(); I != E; ++I) { + // Determine the block of the use. + Instruction *UseInst = cast<Instruction>(*I); + BasicBlock *UseBlock = UseInst->getParent(); + if (PHINode *PN = dyn_cast<PHINode>(UseInst)) { + // PHI nodes use the operand in the predecessor block, not the block with + // the PHI. + unsigned Num = PHINode::getIncomingValueNumForOperand(I.getOperandNo()); + UseBlock = PN->getIncomingBlock(Num); + } + // Check that it dominates. + if (!DT->dominates(BB, UseBlock)) + return false; + } + return true; +} + +bool Sinking::runOnFunction(Function &F) { + DT = &getAnalysis<DominatorTree>(); + LI = &getAnalysis<LoopInfo>(); + AA = &getAnalysis<AliasAnalysis>(); + + bool EverMadeChange = false; + + while (1) { + bool MadeChange = false; + + // Process all basic blocks. + for (Function::iterator I = F.begin(), E = F.end(); + I != E; ++I) + MadeChange |= ProcessBlock(*I); + + // If this iteration over the code changed anything, keep iterating. + if (!MadeChange) break; + EverMadeChange = true; + } + return EverMadeChange; +} + +bool Sinking::ProcessBlock(BasicBlock &BB) { + // Can't sink anything out of a block that has less than two successors. + if (BB.getTerminator()->getNumSuccessors() <= 1 || BB.empty()) return false; + + // Don't bother sinking code out of unreachable blocks. In addition to being + // unprofitable, it can also lead to infinite looping, because in an unreachable + // loop there may be nowhere to stop. + if (!DT->isReachableFromEntry(&BB)) return false; + + bool MadeChange = false; + + // Walk the basic block bottom-up. Remember if we saw a store. + BasicBlock::iterator I = BB.end(); + --I; + bool ProcessedBegin = false; + SmallPtrSet<Instruction *, 8> Stores; + do { + Instruction *Inst = I; // The instruction to sink. + + // Predecrement I (if it's not begin) so that it isn't invalidated by + // sinking. + ProcessedBegin = I == BB.begin(); + if (!ProcessedBegin) + --I; + + if (isa<DbgInfoIntrinsic>(Inst)) + continue; + + if (SinkInstruction(Inst, Stores)) + ++NumSunk, MadeChange = true; + + // If we just processed the first instruction in the block, we're done. + } while (!ProcessedBegin); + + return MadeChange; +} + +static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA, + SmallPtrSet<Instruction *, 8> &Stores) { + if (LoadInst *L = dyn_cast<LoadInst>(Inst)) { + if (L->isVolatile()) return false; + + Value *Ptr = L->getPointerOperand(); + unsigned Size = AA->getTypeStoreSize(L->getType()); + for (SmallPtrSet<Instruction *, 8>::iterator I = Stores.begin(), + E = Stores.end(); I != E; ++I) + if (AA->getModRefInfo(*I, Ptr, Size) & AliasAnalysis::Mod) + return false; + } + + if (Inst->mayWriteToMemory()) { + Stores.insert(Inst); + return false; + } + + return Inst->isSafeToSpeculativelyExecute(); +} + +/// SinkInstruction - Determine whether it is safe to sink the specified machine +/// instruction out of its current block into a successor. +bool Sinking::SinkInstruction(Instruction *Inst, + SmallPtrSet<Instruction *, 8> &Stores) { + // Check if it's safe to move the instruction. + if (!isSafeToMove(Inst, AA, Stores)) + return false; + + // FIXME: This should include support for sinking instructions within the + // block they are currently in to shorten the live ranges. We often get + // instructions sunk into the top of a large block, but it would be better to + // also sink them down before their first use in the block. This xform has to + // be careful not to *increase* register pressure though, e.g. sinking + // "x = y + z" down if it kills y and z would increase the live ranges of y + // and z and only shrink the live range of x. + + // Loop over all the operands of the specified instruction. If there is + // anything we can't handle, bail out. + BasicBlock *ParentBlock = Inst->getParent(); + + // SuccToSinkTo - This is the successor to sink this instruction to, once we + // decide. + BasicBlock *SuccToSinkTo = 0; + + // FIXME: This picks a successor to sink into based on having one + // successor that dominates all the uses. However, there are cases where + // sinking can happen but where the sink point isn't a successor. For + // example: + // x = computation + // if () {} else {} + // use x + // the instruction could be sunk over the whole diamond for the + // if/then/else (or loop, etc), allowing it to be sunk into other blocks + // after that. + + // Instructions can only be sunk if all their uses are in blocks + // dominated by one of the successors. + // Look at all the successors and decide which one + // we should sink to. + for (succ_iterator SI = succ_begin(ParentBlock), + E = succ_end(ParentBlock); SI != E; ++SI) { + if (AllUsesDominatedByBlock(Inst, *SI)) { + SuccToSinkTo = *SI; + break; + } + } + + // If we couldn't find a block to sink to, ignore this instruction. + if (SuccToSinkTo == 0) + return false; + + // It is not possible to sink an instruction into its own block. This can + // happen with loops. + if (Inst->getParent() == SuccToSinkTo) + return false; + + DEBUG(dbgs() << "Sink instr " << *Inst); + DEBUG(dbgs() << "to block "; + WriteAsOperand(dbgs(), SuccToSinkTo, false)); + + // If the block has multiple predecessors, this would introduce computation on + // a path that it doesn't already exist. We could split the critical edge, + // but for now we just punt. + // FIXME: Split critical edges if not backedges. + if (SuccToSinkTo->getUniquePredecessor() != ParentBlock) { + // We cannot sink a load across a critical edge - there may be stores in + // other code paths. + if (!Inst->isSafeToSpeculativelyExecute()) { + DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n"); + return false; + } + + // We don't want to sink across a critical edge if we don't dominate the + // successor. We could be introducing calculations to new code paths. + if (!DT->dominates(ParentBlock, SuccToSinkTo)) { + DEBUG(dbgs() << " *** PUNTING: Critical edge found\n"); + return false; + } + + // Don't sink instructions into a loop. + if (LI->isLoopHeader(SuccToSinkTo)) { + DEBUG(dbgs() << " *** PUNTING: Loop header found\n"); + return false; + } + + // Otherwise we are OK with sinking along a critical edge. + DEBUG(dbgs() << "Sinking along critical edge.\n"); + } + + // Determine where to insert into. Skip phi nodes. + BasicBlock::iterator InsertPos = SuccToSinkTo->begin(); + while (InsertPos != SuccToSinkTo->end() && isa<PHINode>(InsertPos)) + ++InsertPos; + + // Move the instruction. + Inst->moveBefore(InsertPos); + return true; +} diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 8ad66dd..6d4fe4b 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -344,7 +344,7 @@ static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD) { DILocation OrigLocation = ILoc.getOrigLocation(); MDNode *NewLoc = TheCallMD; if (OrigLocation.Verify()) - NewLoc = UpdateInlinedAtInfo(OrigLocation.getNode(), TheCallMD); + NewLoc = UpdateInlinedAtInfo(OrigLocation, TheCallMD); Value *MDVs[] = { InsnMD->getOperand(0), // Line diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index f181f3a..13f0a28 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -861,7 +861,7 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, // Find the nearest store that has a lower than this load. StoresByIndexTy::iterator I = std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), - std::pair<unsigned, StoreInst*>(LoadIdx, 0), + std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)), StoreIndexSearchPredicate()); // If there is no store before this load, then we can't promote this load. @@ -886,7 +886,7 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI) { DIVariable DIVar(DDI->getVariable()); - if (!DIVar.getNode()) + if (!DIVar.Verify()) return; if (!DIF) diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index 25d50db..f4bdb527 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "ssaupdater" -#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Instructions.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/AlignOf.h" @@ -20,40 +19,17 @@ #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Transforms/Utils/SSAUpdaterImpl.h" using namespace llvm; -/// BBInfo - Per-basic block information used internally by SSAUpdater. -/// The predecessors of each block are cached here since pred_iterator is -/// slow and we need to iterate over the blocks at least a few times. -class SSAUpdater::BBInfo { -public: - BasicBlock *BB; // Back-pointer to the corresponding block. - Value *AvailableVal; // Value to use in this block. - BBInfo *DefBB; // Block that defines the available value. - int BlkNum; // Postorder number. - BBInfo *IDom; // Immediate dominator. - unsigned NumPreds; // Number of predecessor blocks. - BBInfo **Preds; // Array[NumPreds] of predecessor blocks. - PHINode *PHITag; // Marker for existing PHIs that match. - - BBInfo(BasicBlock *ThisBB, Value *V) - : BB(ThisBB), AvailableVal(V), DefBB(V ? this : 0), BlkNum(0), IDom(0), - NumPreds(0), Preds(0), PHITag(0) { } -}; - -typedef DenseMap<BasicBlock*, SSAUpdater::BBInfo*> BBMapTy; - typedef DenseMap<BasicBlock*, Value*> AvailableValsTy; static AvailableValsTy &getAvailableVals(void *AV) { return *static_cast<AvailableValsTy*>(AV); } -static BBMapTy *getBBMap(void *BM) { - return static_cast<BBMapTy*>(BM); -} - SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI) - : AV(0), PrototypeValue(0), BM(0), InsertedPHIs(NewPHI) {} + : AV(0), PrototypeValue(0), InsertedPHIs(NewPHI) {} SSAUpdater::~SSAUpdater() { delete &getAvailableVals(AV); @@ -105,9 +81,7 @@ static bool IsEquivalentPHI(PHINode *PHI, /// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is /// live at the end of the specified block. Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) { - assert(BM == 0 && "Unexpected Internal State"); Value *Res = GetValueAtEndOfBlockInternal(BB); - assert(BM == 0 && "Unexpected Internal State"); return Res; } @@ -231,427 +205,126 @@ void SSAUpdater::RewriteUse(Use &U) { U.set(V); } -/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry -/// for the specified BB and if so, return it. If not, construct SSA form by -/// first calculating the required placement of PHIs and then inserting new -/// PHIs where needed. -Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { - AvailableValsTy &AvailableVals = getAvailableVals(AV); - if (Value *V = AvailableVals[BB]) - return V; - - // Pool allocation used internally by GetValueAtEndOfBlock. - BumpPtrAllocator Allocator; - BBMapTy BBMapObj; - BM = &BBMapObj; - - SmallVector<BBInfo*, 100> BlockList; - BuildBlockList(BB, &BlockList, &Allocator); - - // Special case: bail out if BB is unreachable. - if (BlockList.size() == 0) { - BM = 0; - return UndefValue::get(PrototypeValue->getType()); - } - - FindDominators(&BlockList); - FindPHIPlacement(&BlockList); - FindAvailableVals(&BlockList); - - BM = 0; - return BBMapObj[BB]->DefBB->AvailableVal; +/// PHIiter - Iterator for PHI operands. This is used for the PHI_iterator +/// in the SSAUpdaterImpl template. +namespace { + class PHIiter { + private: + PHINode *PHI; + unsigned idx; + + public: + explicit PHIiter(PHINode *P) // begin iterator + : PHI(P), idx(0) {} + PHIiter(PHINode *P, bool) // end iterator + : PHI(P), idx(PHI->getNumIncomingValues()) {} + + PHIiter &operator++() { ++idx; return *this; } + bool operator==(const PHIiter& x) const { return idx == x.idx; } + bool operator!=(const PHIiter& x) const { return !operator==(x); } + Value *getIncomingValue() { return PHI->getIncomingValue(idx); } + BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); } + }; } -/// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds -/// vector, set Info->NumPreds, and allocate space in Info->Preds. -static void FindPredecessorBlocks(SSAUpdater::BBInfo *Info, - SmallVectorImpl<BasicBlock*> *Preds, - BumpPtrAllocator *Allocator) { - // We can get our predecessor info by walking the pred_iterator list, - // but it is relatively slow. If we already have PHI nodes in this - // block, walk one of them to get the predecessor list instead. - BasicBlock *BB = Info->BB; - if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) { - for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI) - Preds->push_back(SomePhi->getIncomingBlock(PI)); - } else { - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - Preds->push_back(*PI); +/// SSAUpdaterTraits<SSAUpdater> - Traits for the SSAUpdaterImpl template, +/// specialized for SSAUpdater. +namespace llvm { +template<> +class SSAUpdaterTraits<SSAUpdater> { +public: + typedef BasicBlock BlkT; + typedef Value *ValT; + typedef PHINode PhiT; + + typedef succ_iterator BlkSucc_iterator; + static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); } + static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); } + + typedef PHIiter PHI_iterator; + static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); } + static inline PHI_iterator PHI_end(PhiT *PHI) { + return PHI_iterator(PHI, true); } - Info->NumPreds = Preds->size(); - Info->Preds = static_cast<SSAUpdater::BBInfo**> - (Allocator->Allocate(Info->NumPreds * sizeof(SSAUpdater::BBInfo*), - AlignOf<SSAUpdater::BBInfo*>::Alignment)); -} - -/// BuildBlockList - Starting from the specified basic block, traverse back -/// through its predecessors until reaching blocks with known values. Create -/// BBInfo structures for the blocks and append them to the block list. -void SSAUpdater::BuildBlockList(BasicBlock *BB, BlockListTy *BlockList, - BumpPtrAllocator *Allocator) { - AvailableValsTy &AvailableVals = getAvailableVals(AV); - BBMapTy *BBMap = getBBMap(BM); - SmallVector<BBInfo*, 10> RootList; - SmallVector<BBInfo*, 64> WorkList; - - BBInfo *Info = new (*Allocator) BBInfo(BB, 0); - (*BBMap)[BB] = Info; - WorkList.push_back(Info); - - // Search backward from BB, creating BBInfos along the way and stopping when - // reaching blocks that define the value. Record those defining blocks on - // the RootList. - SmallVector<BasicBlock*, 10> Preds; - while (!WorkList.empty()) { - Info = WorkList.pop_back_val(); - Preds.clear(); - FindPredecessorBlocks(Info, &Preds, Allocator); - - // Treat an unreachable predecessor as a definition with 'undef'. - if (Info->NumPreds == 0) { - Info->AvailableVal = UndefValue::get(PrototypeValue->getType()); - Info->DefBB = Info; - RootList.push_back(Info); - continue; - } - - for (unsigned p = 0; p != Info->NumPreds; ++p) { - BasicBlock *Pred = Preds[p]; - // Check if BBMap already has a BBInfo for the predecessor block. - BBMapTy::value_type &BBMapBucket = BBMap->FindAndConstruct(Pred); - if (BBMapBucket.second) { - Info->Preds[p] = BBMapBucket.second; - continue; - } - - // Create a new BBInfo for the predecessor. - Value *PredVal = AvailableVals.lookup(Pred); - BBInfo *PredInfo = new (*Allocator) BBInfo(Pred, PredVal); - BBMapBucket.second = PredInfo; - Info->Preds[p] = PredInfo; - - if (PredInfo->AvailableVal) { - RootList.push_back(PredInfo); - continue; - } - WorkList.push_back(PredInfo); + /// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds + /// vector, set Info->NumPreds, and allocate space in Info->Preds. + static void FindPredecessorBlocks(BasicBlock *BB, + SmallVectorImpl<BasicBlock*> *Preds) { + // We can get our predecessor info by walking the pred_iterator list, + // but it is relatively slow. If we already have PHI nodes in this + // block, walk one of them to get the predecessor list instead. + if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) { + for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI) + Preds->push_back(SomePhi->getIncomingBlock(PI)); + } else { + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + Preds->push_back(*PI); } } - // Now that we know what blocks are backwards-reachable from the starting - // block, do a forward depth-first traversal to assign postorder numbers - // to those blocks. - BBInfo *PseudoEntry = new (*Allocator) BBInfo(0, 0); - unsigned BlkNum = 1; - - // Initialize the worklist with the roots from the backward traversal. - while (!RootList.empty()) { - Info = RootList.pop_back_val(); - Info->IDom = PseudoEntry; - Info->BlkNum = -1; - WorkList.push_back(Info); + /// GetUndefVal - Get an undefined value of the same type as the value + /// being handled. + static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) { + return UndefValue::get(Updater->PrototypeValue->getType()); } - while (!WorkList.empty()) { - Info = WorkList.back(); - - if (Info->BlkNum == -2) { - // All the successors have been handled; assign the postorder number. - Info->BlkNum = BlkNum++; - // If not a root, put it on the BlockList. - if (!Info->AvailableVal) - BlockList->push_back(Info); - WorkList.pop_back(); - continue; - } - - // Leave this entry on the worklist, but set its BlkNum to mark that its - // successors have been put on the worklist. When it returns to the top - // the list, after handling its successors, it will be assigned a number. - Info->BlkNum = -2; - - // Add unvisited successors to the work list. - for (succ_iterator SI = succ_begin(Info->BB), E = succ_end(Info->BB); - SI != E; ++SI) { - BBInfo *SuccInfo = (*BBMap)[*SI]; - if (!SuccInfo || SuccInfo->BlkNum) - continue; - SuccInfo->BlkNum = -1; - WorkList.push_back(SuccInfo); - } + /// CreateEmptyPHI - Create a new PHI instruction in the specified block. + /// Reserve space for the operands but do not fill them in yet. + static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds, + SSAUpdater *Updater) { + PHINode *PHI = PHINode::Create(Updater->PrototypeValue->getType(), + Updater->PrototypeValue->getName(), + &BB->front()); + PHI->reserveOperandSpace(NumPreds); + return PHI; } - PseudoEntry->BlkNum = BlkNum; -} -/// IntersectDominators - This is the dataflow lattice "meet" operation for -/// finding dominators. Given two basic blocks, it walks up the dominator -/// tree until it finds a common dominator of both. It uses the postorder -/// number of the blocks to determine how to do that. -static SSAUpdater::BBInfo *IntersectDominators(SSAUpdater::BBInfo *Blk1, - SSAUpdater::BBInfo *Blk2) { - while (Blk1 != Blk2) { - while (Blk1->BlkNum < Blk2->BlkNum) { - Blk1 = Blk1->IDom; - if (!Blk1) - return Blk2; - } - while (Blk2->BlkNum < Blk1->BlkNum) { - Blk2 = Blk2->IDom; - if (!Blk2) - return Blk1; - } + /// AddPHIOperand - Add the specified value as an operand of the PHI for + /// the specified predecessor block. + static void AddPHIOperand(PHINode *PHI, Value *Val, BasicBlock *Pred) { + PHI->addIncoming(Val, Pred); } - return Blk1; -} -/// FindDominators - Calculate the dominator tree for the subset of the CFG -/// corresponding to the basic blocks on the BlockList. This uses the -/// algorithm from: "A Simple, Fast Dominance Algorithm" by Cooper, Harvey and -/// Kennedy, published in Software--Practice and Experience, 2001, 4:1-10. -/// Because the CFG subset does not include any edges leading into blocks that -/// define the value, the results are not the usual dominator tree. The CFG -/// subset has a single pseudo-entry node with edges to a set of root nodes -/// for blocks that define the value. The dominators for this subset CFG are -/// not the standard dominators but they are adequate for placing PHIs within -/// the subset CFG. -void SSAUpdater::FindDominators(BlockListTy *BlockList) { - bool Changed; - do { - Changed = false; - // Iterate over the list in reverse order, i.e., forward on CFG edges. - for (BlockListTy::reverse_iterator I = BlockList->rbegin(), - E = BlockList->rend(); I != E; ++I) { - BBInfo *Info = *I; - - // Start with the first predecessor. - assert(Info->NumPreds > 0 && "unreachable block"); - BBInfo *NewIDom = Info->Preds[0]; - - // Iterate through the block's other predecessors. - for (unsigned p = 1; p != Info->NumPreds; ++p) { - BBInfo *Pred = Info->Preds[p]; - NewIDom = IntersectDominators(NewIDom, Pred); - } - - // Check if the IDom value has changed. - if (NewIDom != Info->IDom) { - Info->IDom = NewIDom; - Changed = true; - } - } - } while (Changed); -} - -/// IsDefInDomFrontier - Search up the dominator tree from Pred to IDom for -/// any blocks containing definitions of the value. If one is found, then the -/// successor of Pred is in the dominance frontier for the definition, and -/// this function returns true. -static bool IsDefInDomFrontier(const SSAUpdater::BBInfo *Pred, - const SSAUpdater::BBInfo *IDom) { - for (; Pred != IDom; Pred = Pred->IDom) { - if (Pred->DefBB == Pred) - return true; + /// InstrIsPHI - Check if an instruction is a PHI. + /// + static PHINode *InstrIsPHI(Instruction *I) { + return dyn_cast<PHINode>(I); } - return false; -} - -/// FindPHIPlacement - PHIs are needed in the iterated dominance frontiers of -/// the known definitions. Iteratively add PHIs in the dom frontiers until -/// nothing changes. Along the way, keep track of the nearest dominating -/// definitions for non-PHI blocks. -void SSAUpdater::FindPHIPlacement(BlockListTy *BlockList) { - bool Changed; - do { - Changed = false; - // Iterate over the list in reverse order, i.e., forward on CFG edges. - for (BlockListTy::reverse_iterator I = BlockList->rbegin(), - E = BlockList->rend(); I != E; ++I) { - BBInfo *Info = *I; - - // If this block already needs a PHI, there is nothing to do here. - if (Info->DefBB == Info) - continue; - - // Default to use the same def as the immediate dominator. - BBInfo *NewDefBB = Info->IDom->DefBB; - for (unsigned p = 0; p != Info->NumPreds; ++p) { - if (IsDefInDomFrontier(Info->Preds[p], Info->IDom)) { - // Need a PHI here. - NewDefBB = Info; - break; - } - } - - // Check if anything changed. - if (NewDefBB != Info->DefBB) { - Info->DefBB = NewDefBB; - Changed = true; - } - } - } while (Changed); -} - -/// FindAvailableVal - If this block requires a PHI, first check if an existing -/// PHI matches the PHI placement and reaching definitions computed earlier, -/// and if not, create a new PHI. Visit all the block's predecessors to -/// calculate the available value for each one and fill in the incoming values -/// for a new PHI. -void SSAUpdater::FindAvailableVals(BlockListTy *BlockList) { - AvailableValsTy &AvailableVals = getAvailableVals(AV); - // Go through the worklist in forward order (i.e., backward through the CFG) - // and check if existing PHIs can be used. If not, create empty PHIs where - // they are needed. - for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end(); - I != E; ++I) { - BBInfo *Info = *I; - // Check if there needs to be a PHI in BB. - if (Info->DefBB != Info) - continue; - - // Look for an existing PHI. - FindExistingPHI(Info->BB, BlockList); - if (Info->AvailableVal) - continue; - - PHINode *PHI = PHINode::Create(PrototypeValue->getType(), - PrototypeValue->getName(), - &Info->BB->front()); - PHI->reserveOperandSpace(Info->NumPreds); - Info->AvailableVal = PHI; - AvailableVals[Info->BB] = PHI; + /// ValueIsPHI - Check if a value is a PHI. + /// + static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) { + return dyn_cast<PHINode>(Val); } - // Now go back through the worklist in reverse order to fill in the arguments - // for any new PHIs added in the forward traversal. - for (BlockListTy::reverse_iterator I = BlockList->rbegin(), - E = BlockList->rend(); I != E; ++I) { - BBInfo *Info = *I; - - if (Info->DefBB != Info) { - // Record the available value at join nodes to speed up subsequent - // uses of this SSAUpdater for the same value. - if (Info->NumPreds > 1) - AvailableVals[Info->BB] = Info->DefBB->AvailableVal; - continue; - } - - // Check if this block contains a newly added PHI. - PHINode *PHI = dyn_cast<PHINode>(Info->AvailableVal); - if (!PHI || PHI->getNumIncomingValues() == Info->NumPreds) - continue; - - // Iterate through the block's predecessors. - for (unsigned p = 0; p != Info->NumPreds; ++p) { - BBInfo *PredInfo = Info->Preds[p]; - BasicBlock *Pred = PredInfo->BB; - // Skip to the nearest preceding definition. - if (PredInfo->DefBB != PredInfo) - PredInfo = PredInfo->DefBB; - PHI->addIncoming(PredInfo->AvailableVal, Pred); - } - - DEBUG(dbgs() << " Inserted PHI: " << *PHI << "\n"); - - // If the client wants to know about all new instructions, tell it. - if (InsertedPHIs) InsertedPHIs->push_back(PHI); + /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source + /// operands, i.e., it was just added. + static PHINode *ValueIsNewPHI(Value *Val, SSAUpdater *Updater) { + PHINode *PHI = ValueIsPHI(Val, Updater); + if (PHI && PHI->getNumIncomingValues() == 0) + return PHI; + return 0; } -} -/// FindExistingPHI - Look through the PHI nodes in a block to see if any of -/// them match what is needed. -void SSAUpdater::FindExistingPHI(BasicBlock *BB, BlockListTy *BlockList) { - PHINode *SomePHI; - for (BasicBlock::iterator It = BB->begin(); - (SomePHI = dyn_cast<PHINode>(It)); ++It) { - if (CheckIfPHIMatches(SomePHI)) { - RecordMatchingPHI(SomePHI); - break; - } - // Match failed: clear all the PHITag values. - for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end(); - I != E; ++I) - (*I)->PHITag = 0; + /// GetPHIValue - For the specified PHI instruction, return the value + /// that it defines. + static Value *GetPHIValue(PHINode *PHI) { + return PHI; } -} +}; -/// CheckIfPHIMatches - Check if a PHI node matches the placement and values -/// in the BBMap. -bool SSAUpdater::CheckIfPHIMatches(PHINode *PHI) { - BBMapTy *BBMap = getBBMap(BM); - SmallVector<PHINode*, 20> WorkList; - WorkList.push_back(PHI); - - // Mark that the block containing this PHI has been visited. - (*BBMap)[PHI->getParent()]->PHITag = PHI; - - while (!WorkList.empty()) { - PHI = WorkList.pop_back_val(); - - // Iterate through the PHI's incoming values. - for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { - Value *IncomingVal = PHI->getIncomingValue(i); - BBInfo *PredInfo = (*BBMap)[PHI->getIncomingBlock(i)]; - // Skip to the nearest preceding definition. - if (PredInfo->DefBB != PredInfo) - PredInfo = PredInfo->DefBB; - - // Check if it matches the expected value. - if (PredInfo->AvailableVal) { - if (IncomingVal == PredInfo->AvailableVal) - continue; - return false; - } - - // Check if the value is a PHI in the correct block. - PHINode *IncomingPHIVal = dyn_cast<PHINode>(IncomingVal); - if (!IncomingPHIVal || IncomingPHIVal->getParent() != PredInfo->BB) - return false; - - // If this block has already been visited, check if this PHI matches. - if (PredInfo->PHITag) { - if (IncomingPHIVal == PredInfo->PHITag) - continue; - return false; - } - PredInfo->PHITag = IncomingPHIVal; - - WorkList.push_back(IncomingPHIVal); - } - } - return true; -} +} // End llvm namespace -/// RecordMatchingPHI - For a PHI node that matches, record it and its input -/// PHIs in both the BBMap and the AvailableVals mapping. -void SSAUpdater::RecordMatchingPHI(PHINode *PHI) { - BBMapTy *BBMap = getBBMap(BM); +/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry +/// for the specified BB and if so, return it. If not, construct SSA form by +/// first calculating the required placement of PHIs and then inserting new +/// PHIs where needed. +Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { AvailableValsTy &AvailableVals = getAvailableVals(AV); - SmallVector<PHINode*, 20> WorkList; - WorkList.push_back(PHI); - - // Record this PHI. - BasicBlock *BB = PHI->getParent(); - AvailableVals[BB] = PHI; - (*BBMap)[BB]->AvailableVal = PHI; - - while (!WorkList.empty()) { - PHI = WorkList.pop_back_val(); - - // Iterate through the PHI's incoming values. - for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { - PHINode *IncomingPHIVal = dyn_cast<PHINode>(PHI->getIncomingValue(i)); - if (!IncomingPHIVal) continue; - BB = IncomingPHIVal->getParent(); - BBInfo *Info = (*BBMap)[BB]; - if (!Info || Info->AvailableVal) - continue; - - // Record the PHI and add it to the worklist. - AvailableVals[BB] = IncomingPHIVal; - Info->AvailableVal = IncomingPHIVal; - WorkList.push_back(IncomingPHIVal); - } - } + if (Value *V = AvailableVals[BB]) + return V; + + SSAUpdaterImpl<SSAUpdater> Impl(this, &AvailableVals, InsertedPHIs); + return Impl.GetValue(BB); } |