diff options
Diffstat (limited to 'lib/Analysis')
-rw-r--r-- | lib/Analysis/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Analysis/CaptureTracking.cpp | 20 | ||||
-rw-r--r-- | lib/Analysis/ConstantFolding.cpp | 15 | ||||
-rw-r--r-- | lib/Analysis/DebugInfo.cpp | 68 | ||||
-rw-r--r-- | lib/Analysis/IPA/Andersens.cpp | 2 | ||||
-rw-r--r-- | lib/Analysis/IVUsers.cpp | 5 | ||||
-rw-r--r-- | lib/Analysis/LoopInfo.cpp | 6 | ||||
-rw-r--r-- | lib/Analysis/MemoryDependenceAnalysis.cpp | 525 | ||||
-rw-r--r-- | lib/Analysis/PHITransAddr.cpp | 432 | ||||
-rw-r--r-- | lib/Analysis/ProfileEstimatorPass.cpp | 146 | ||||
-rw-r--r-- | lib/Analysis/ProfileInfo.cpp | 999 | ||||
-rw-r--r-- | lib/Analysis/ProfileInfoLoaderPass.cpp | 56 | ||||
-rw-r--r-- | lib/Analysis/ProfileVerifierPass.cpp | 519 | ||||
-rw-r--r-- | lib/Analysis/ScalarEvolutionExpander.cpp | 6 | ||||
-rw-r--r-- | lib/Analysis/ValueTracking.cpp | 2 |
15 files changed, 2026 insertions, 776 deletions
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 0a83c3d..17c9b86 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -27,6 +27,7 @@ add_llvm_library(LLVMAnalysis LoopPass.cpp MemoryBuiltins.cpp MemoryDependenceAnalysis.cpp + PHITransAddr.cpp PointerTracking.cpp PostDominators.cpp ProfileEstimatorPass.cpp diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index a276c64..10a8b11 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -25,6 +25,16 @@ #include "llvm/Support/CallSite.h" using namespace llvm; +/// As its comment mentions, PointerMayBeCaptured can be expensive. +/// However, it's not easy for BasicAA to cache the result, because +/// it's an ImmutablePass. To work around this, bound queries at a +/// fixed number of uses. +/// +/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep +/// a cache. Then we can move the code from BasicAliasAnalysis into +/// that path, and remove this threshold. +static int const Threshold = 20; + /// PointerMayBeCaptured - Return true if this pointer value may be captured /// by the enclosing function (which is required to exist). This routine can /// be expensive, so consider caching the results. The boolean ReturnCaptures @@ -35,11 +45,17 @@ using namespace llvm; bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures) { assert(isa<PointerType>(V->getType()) && "Capture is for pointers only!"); - SmallVector<Use*, 16> Worklist; - SmallSet<Use*, 16> Visited; + SmallVector<Use*, Threshold> Worklist; + SmallSet<Use*, Threshold> Visited; + int Count = 0; for (Value::use_const_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE; ++UI) { + // If there are lots of uses, conservatively say that the value + // is captured to avoid taking too much compile time. + if (Count++ >= Threshold) + return true; + Use *U = &UI.getUse(); Visited.insert(U); Worklist.push_back(U); diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 96f738e..eaf90d0 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -432,7 +432,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, // Instead of loading constant c string, use corresponding integer value // directly if string length is small enough. std::string Str; - if (TD && GetConstantStringInfo(CE->getOperand(0), Str) && !Str.empty()) { + if (TD && GetConstantStringInfo(CE, Str) && !Str.empty()) { unsigned StrLen = Str.length(); const Type *Ty = cast<PointerType>(CE->getType())->getElementType(); unsigned NumBits = Ty->getPrimitiveSizeInBits(); @@ -569,9 +569,16 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, SmallVector<Constant*, 32> NewIdxs; do { if (const SequentialType *ATy = dyn_cast<SequentialType>(Ty)) { - // The only pointer indexing we'll do is on the first index of the GEP. - if (isa<PointerType>(ATy) && !NewIdxs.empty()) - break; + if (isa<PointerType>(ATy)) { + // The only pointer indexing we'll do is on the first index of the GEP. + if (!NewIdxs.empty()) + break; + + // Only handle pointers to sized types, not pointers to functions. + if (!ATy->getElementType()->isSized()) + return 0; + } + // Determine which element of the array the offset points into. APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType())); if (ElemSize == 0) diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp index 41d803c..1c9f500 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/Analysis/DebugInfo.cpp @@ -866,7 +866,9 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, DICompileUnit CompileUnit, unsigned LineNo, DIType Type, bool isLocalToUnit, - bool isDefinition) { + bool isDefinition, + unsigned VK, unsigned VIndex, + DIType ContainingType) { Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_subprogram), @@ -879,9 +881,38 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), Type.getNode(), ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), - ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition) + ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), + ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK), + ConstantInt::get(Type::getInt32Ty(VMContext), VIndex), + ContainingType.getNode() }; - return DISubprogram(MDNode::get(VMContext, &Elts[0], 11)); + return DISubprogram(MDNode::get(VMContext, &Elts[0], 14)); +} + +/// CreateSubprogramDefinition - Create new subprogram descriptor for the +/// given declaration. +DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration) { + if (SPDeclaration.isDefinition()) + return DISubprogram(SPDeclaration.getNode()); + + MDNode *DeclNode = SPDeclaration.getNode(); + Value *Elts[] = { + GetTagConstant(dwarf::DW_TAG_subprogram), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + DeclNode->getElement(2), // Context + DeclNode->getElement(3), // Name + DeclNode->getElement(4), // DisplayName + DeclNode->getElement(5), // LinkageName + DeclNode->getElement(6), // CompileUnit + DeclNode->getElement(7), // LineNo + DeclNode->getElement(8), // Type + DeclNode->getElement(9), // isLocalToUnit + ConstantInt::get(Type::getInt1Ty(VMContext), true), + DeclNode->getElement(11), // Virtuality + DeclNode->getElement(12), // VIndex + DeclNode->getElement(13) // Containting Type + }; + return DISubprogram(MDNode::get(VMContext, &Elts[0], 14)); } /// CreateGlobalVariable - Create a new descriptor for the specified global. @@ -1019,6 +1050,37 @@ Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D, return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd); } +/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. +Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, Value *Offset, + DIVariable D, + Instruction *InsertBefore) { + assert(V && "no value passed to dbg.value"); + assert(Offset->getType() == Type::getInt64Ty(V->getContext()) && + "offset must be i64"); + if (!ValueFn) + ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); + + Value *Elts[] = { V }; + Value *Args[] = { MDNode::get(V->getContext(), Elts, 1), Offset, + D.getNode() }; + return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore); +} + +/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. +Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, Value *Offset, + DIVariable D, + BasicBlock *InsertAtEnd) { + assert(V && "no value passed to dbg.value"); + assert(Offset->getType() == Type::getInt64Ty(V->getContext()) && + "offset must be i64"); + if (!ValueFn) + ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); + + Value *Elts[] = { V }; + Value *Args[] = { MDNode::get(V->getContext(), Elts, 1), Offset, + D.getNode() }; + return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd); +} //===----------------------------------------------------------------------===// // DebugInfoFinder implementations. diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp index e12db81..4d5b312 100644 --- a/lib/Analysis/IPA/Andersens.cpp +++ b/lib/Analysis/IPA/Andersens.cpp @@ -121,8 +121,6 @@ namespace { return *LHS == *RHS; } - - static bool isPod() { return true; } }; class Andersens : public ModulePass, public AliasAnalysis, diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 37747b6..627dbbb 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -53,7 +53,7 @@ static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) { if (newLoop == L) return false; // if newLoop is an outer loop of L, this is OK. - if (!LoopInfo::isNotAlreadyContainedIn(L, newLoop)) + if (newLoop->contains(L->getHeader())) return false; } return true; @@ -307,6 +307,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) AddUsersIfInteresting(I); + Processed.clear(); return false; } @@ -369,7 +370,7 @@ void IVUsers::dump() const { void IVUsers::releaseMemory() { IVUsesByStride.clear(); StrideOrder.clear(); - Processed.clear(); + IVUses.clear(); } void IVStrideUse::deleted() { diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 4de756c..34089ee 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -316,12 +316,12 @@ bool Loop::hasDedicatedExits() const { /// getUniqueExitBlocks - Return all unique successor blocks of this loop. /// These are the blocks _outside of the current loop_ which are branched to. -/// This assumes that loop is in canonical form. +/// This assumes that loop exits are in canonical form. /// void Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const { - assert(isLoopSimplifyForm() && - "getUniqueExitBlocks assumes the loop is in canonical form!"); + assert(hasDedicatedExits() && + "getUniqueExitBlocks assumes the loop has canonical form exits!"); // Sort the blocks vector so that we can use binary search to do quick // lookups. diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index ae6f970..a0c7706 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/PHITransAddr.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/PredIteratorCache.h" @@ -172,7 +173,7 @@ MemDepResult MemoryDependenceAnalysis:: getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB) { - Value *invariantTag = 0; + Value *InvariantTag = 0; // Walk backwards through the basic block, looking for dependencies. while (ScanIt != BB->begin()) { @@ -180,34 +181,36 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, // If we're in an invariant region, no dependencies can be found before // we pass an invariant-begin marker. - if (invariantTag == Inst) { - invariantTag = 0; + if (InvariantTag == Inst) { + InvariantTag = 0; continue; - } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { + } + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { + // Debug intrinsics don't cause dependences. + if (isa<DbgInfoIntrinsic>(Inst)) continue; + // If we pass an invariant-end marker, then we've just entered an // invariant region and can start ignoring dependencies. if (II->getIntrinsicID() == Intrinsic::invariant_end) { - uint64_t invariantSize = ~0ULL; - if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getOperand(2))) - invariantSize = CI->getZExtValue(); - - AliasAnalysis::AliasResult R = - AA->alias(II->getOperand(3), invariantSize, MemPtr, MemSize); + // FIXME: This only considers queries directly on the invariant-tagged + // pointer, not on query pointers that are indexed off of them. It'd + // be nice to handle that at some point. + AliasAnalysis::AliasResult R = + AA->alias(II->getOperand(3), ~0U, MemPtr, ~0U); if (R == AliasAnalysis::MustAlias) { - invariantTag = II->getOperand(1); + InvariantTag = II->getOperand(1); continue; } // If we reach a lifetime begin or end marker, then the query ends here // because the value is undefined. - } else if (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end) { - uint64_t invariantSize = ~0ULL; - if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getOperand(1))) - invariantSize = CI->getZExtValue(); - + } else if (II->getIntrinsicID() == Intrinsic::lifetime_start) { + // FIXME: This only considers queries directly on the invariant-tagged + // pointer, not on query pointers that are indexed off of them. It'd + // be nice to handle that at some point. AliasAnalysis::AliasResult R = - AA->alias(II->getOperand(2), invariantSize, MemPtr, MemSize); + AA->alias(II->getOperand(2), ~0U, MemPtr, ~0U); if (R == AliasAnalysis::MustAlias) return MemDepResult::getDef(II); } @@ -215,10 +218,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, // If we're querying on a load and we're in an invariant region, we're done // at this point. Nothing a load depends on can live in an invariant region. - if (isLoad && invariantTag) continue; - - // Debug intrinsics don't cause dependences. - if (isa<DbgInfoIntrinsic>(Inst)) continue; + if (isLoad && InvariantTag) continue; // Values depend on loads if the pointers are must aliased. This means that // a load depends on another must aliased load from the same value. @@ -243,7 +243,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { // There can't be stores to the value we care about inside an // invariant region. - if (invariantTag) continue; + if (InvariantTag) continue; // If alias analysis can tell that this store is guaranteed to not modify // the query pointer, ignore it. Use getModRefInfo to handle cases where @@ -292,7 +292,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, case AliasAnalysis::Mod: // If we're in an invariant region, we can ignore calls that ONLY // modify the pointer. - if (invariantTag) continue; + if (InvariantTag) continue; return MemDepResult::getClobber(Inst); case AliasAnalysis::Ref: // If the call is known to never store to the pointer, and if this is a @@ -374,21 +374,22 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { IntrinsicID = II->getIntrinsicID(); switch (IntrinsicID) { - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::invariant_start: - MemPtr = QueryInst->getOperand(2); - MemSize = cast<ConstantInt>(QueryInst->getOperand(1))->getZExtValue(); - break; - case Intrinsic::invariant_end: - MemPtr = QueryInst->getOperand(3); - MemSize = cast<ConstantInt>(QueryInst->getOperand(2))->getZExtValue(); - break; - default: - CallSite QueryCS = CallSite::get(QueryInst); - bool isReadOnly = AA->onlyReadsMemory(QueryCS); - LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos, - QueryParent); + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: + MemPtr = QueryInst->getOperand(2); + MemSize = cast<ConstantInt>(QueryInst->getOperand(1))->getZExtValue(); + break; + case Intrinsic::invariant_end: + MemPtr = QueryInst->getOperand(3); + MemSize = cast<ConstantInt>(QueryInst->getOperand(2))->getZExtValue(); + break; + default: + CallSite QueryCS = CallSite::get(QueryInst); + bool isReadOnly = AA->onlyReadsMemory(QueryCS); + LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos, + QueryParent); + break; } } else { // Non-memory instruction. @@ -421,7 +422,7 @@ static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, if (Count == 0) return; for (unsigned i = 1; i != unsigned(Count); ++i) - assert(Cache[i-1] <= Cache[i] && "Cache isn't sorted!"); + assert(!(Cache[i] < Cache[i-1]) && "Cache isn't sorted!"); } #endif @@ -462,8 +463,8 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { // determine what is dirty, seeding our initial DirtyBlocks worklist. for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end(); I != E; ++I) - if (I->second.isDirty()) - DirtyBlocks.push_back(I->first); + if (I->getResult().isDirty()) + DirtyBlocks.push_back(I->getBB()); // Sort the cache so that we can do fast binary search lookups below. std::sort(Cache.begin(), Cache.end()); @@ -501,27 +502,27 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { DEBUG(AssertSorted(Cache, NumSortedEntries)); NonLocalDepInfo::iterator Entry = std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries, - std::make_pair(DirtyBB, MemDepResult())); - if (Entry != Cache.begin() && prior(Entry)->first == DirtyBB) + NonLocalDepEntry(DirtyBB)); + if (Entry != Cache.begin() && prior(Entry)->getBB() == DirtyBB) --Entry; - MemDepResult *ExistingResult = 0; + NonLocalDepEntry *ExistingResult = 0; if (Entry != Cache.begin()+NumSortedEntries && - Entry->first == DirtyBB) { + Entry->getBB() == DirtyBB) { // If we already have an entry, and if it isn't already dirty, the block // is done. - if (!Entry->second.isDirty()) + if (!Entry->getResult().isDirty()) continue; // Otherwise, remember this slot so we can update the value. - ExistingResult = &Entry->second; + ExistingResult = &*Entry; } // If the dirty entry has a pointer, start scanning from it so we don't have // to rescan the entire block. BasicBlock::iterator ScanPos = DirtyBB->end(); if (ExistingResult) { - if (Instruction *Inst = ExistingResult->getInst()) { + if (Instruction *Inst = ExistingResult->getResult().getInst()) { ScanPos = Inst; // We're removing QueryInst's use of Inst. RemoveFromReverseMap(ReverseNonLocalDeps, Inst, @@ -545,9 +546,9 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { // If we had a dirty entry for the block, update it. Otherwise, just add // a new entry. if (ExistingResult) - *ExistingResult = Dep; + ExistingResult->setResult(Dep, 0); else - Cache.push_back(std::make_pair(DirtyBB, Dep)); + Cache.push_back(NonLocalDepEntry(DirtyBB, Dep, 0)); // If the block has a dependency (i.e. it isn't completely transparent to // the value), remember the association! @@ -587,17 +588,20 @@ getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB, const Type *EltTy = cast<PointerType>(Pointer->getType())->getElementType(); uint64_t PointeeSize = AA->getTypeStoreSize(EltTy); + PHITransAddr Address(Pointer, TD); + // This is the set of blocks we've inspected, and the pointer we consider in // each block. Because of critical edges, we currently bail out if querying // a block with multiple different pointers. This can happen during PHI // translation. DenseMap<BasicBlock*, Value*> Visited; - if (!getNonLocalPointerDepFromBB(Pointer, PointeeSize, isLoad, FromBB, + if (!getNonLocalPointerDepFromBB(Address, PointeeSize, isLoad, FromBB, Result, Visited, true)) return; Result.clear(); - Result.push_back(std::make_pair(FromBB, - MemDepResult::getClobber(FromBB->begin()))); + Result.push_back(NonLocalDepEntry(FromBB, + MemDepResult::getClobber(FromBB->begin()), + Pointer)); } /// GetNonLocalInfoForBlock - Compute the memdep value for BB with @@ -613,30 +617,30 @@ GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize, // the cache set. If so, find it. NonLocalDepInfo::iterator Entry = std::upper_bound(Cache->begin(), Cache->begin()+NumSortedEntries, - std::make_pair(BB, MemDepResult())); - if (Entry != Cache->begin() && prior(Entry)->first == BB) + NonLocalDepEntry(BB)); + if (Entry != Cache->begin() && (Entry-1)->getBB() == BB) --Entry; - MemDepResult *ExistingResult = 0; - if (Entry != Cache->begin()+NumSortedEntries && Entry->first == BB) - ExistingResult = &Entry->second; + NonLocalDepEntry *ExistingResult = 0; + if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB) + ExistingResult = &*Entry; // If we have a cached entry, and it is non-dirty, use it as the value for // this dependency. - if (ExistingResult && !ExistingResult->isDirty()) { + if (ExistingResult && !ExistingResult->getResult().isDirty()) { ++NumCacheNonLocalPtr; - return *ExistingResult; + return ExistingResult->getResult(); } // Otherwise, we have to scan for the value. If we have a dirty cache // entry, start scanning from its position, otherwise we scan from the end // of the block. BasicBlock::iterator ScanPos = BB->end(); - if (ExistingResult && ExistingResult->getInst()) { - assert(ExistingResult->getInst()->getParent() == BB && + if (ExistingResult && ExistingResult->getResult().getInst()) { + assert(ExistingResult->getResult().getInst()->getParent() == BB && "Instruction invalidated?"); ++NumCacheDirtyNonLocalPtr; - ScanPos = ExistingResult->getInst(); + ScanPos = ExistingResult->getResult().getInst(); // Eliminating the dirty entry from 'Cache', so update the reverse info. ValueIsLoadPair CacheKey(Pointer, isLoad); @@ -652,9 +656,9 @@ GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize, // If we had a dirty entry for the block, update it. Otherwise, just add // a new entry. if (ExistingResult) - *ExistingResult = Dep; + ExistingResult->setResult(Dep, Pointer); else - Cache->push_back(std::make_pair(BB, Dep)); + Cache->push_back(NonLocalDepEntry(BB, Dep, Pointer)); // If the block has a dependency (i.e. it isn't completely transparent to // the value), remember the reverse association because we just added it @@ -683,7 +687,7 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, break; case 2: { // Two new entries, insert the last one into place. - MemoryDependenceAnalysis::NonLocalDepEntry Val = Cache.back(); + NonLocalDepEntry Val = Cache.back(); Cache.pop_back(); MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry = std::upper_bound(Cache.begin(), Cache.end()-1, Val); @@ -693,7 +697,7 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, case 1: // One new entry, Just insert the new value at the appropriate position. if (Cache.size() != 1) { - MemoryDependenceAnalysis::NonLocalDepEntry Val = Cache.back(); + NonLocalDepEntry Val = Cache.back(); Cache.pop_back(); MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry = std::upper_bound(Cache.begin(), Cache.end(), Val); @@ -707,275 +711,6 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, } } -/// isPHITranslatable - Return true if the specified computation is derived from -/// a PHI node in the current block and if it is simple enough for us to handle. -static bool isPHITranslatable(Instruction *Inst) { - if (isa<PHINode>(Inst)) - return true; - - // We can handle bitcast of a PHI, but the PHI needs to be in the same block - // as the bitcast. - if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) { - Instruction *OpI = dyn_cast<Instruction>(BC->getOperand(0)); - if (OpI == 0 || OpI->getParent() != Inst->getParent()) - return true; - return isPHITranslatable(OpI); - } - - // We can translate a GEP if all of its operands defined in this block are phi - // translatable. - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { - for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { - Instruction *OpI = dyn_cast<Instruction>(GEP->getOperand(i)); - if (OpI == 0 || OpI->getParent() != Inst->getParent()) - continue; - - if (!isPHITranslatable(OpI)) - return false; - } - return true; - } - - if (Inst->getOpcode() == Instruction::Add && - isa<ConstantInt>(Inst->getOperand(1))) { - Instruction *OpI = dyn_cast<Instruction>(Inst->getOperand(0)); - if (OpI == 0 || OpI->getParent() != Inst->getParent()) - return true; - return isPHITranslatable(OpI); - } - - // cerr << "MEMDEP: Could not PHI translate: " << *Pointer; - // if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst)) - // cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0); - - return false; -} - -/// GetPHITranslatedValue - Given a computation that satisfied the -/// isPHITranslatable predicate, see if we can translate the computation into -/// the specified predecessor block. If so, return that value. -Value *MemoryDependenceAnalysis:: -GetPHITranslatedValue(Value *InVal, BasicBlock *CurBB, BasicBlock *Pred, - const TargetData *TD) const { - // If the input value is not an instruction, or if it is not defined in CurBB, - // then we don't need to phi translate it. - Instruction *Inst = dyn_cast<Instruction>(InVal); - if (Inst == 0 || Inst->getParent() != CurBB) - return InVal; - - if (PHINode *PN = dyn_cast<PHINode>(Inst)) - return PN->getIncomingValueForBlock(Pred); - - // Handle bitcast of PHI. - if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) { - // PHI translate the input operand. - Value *PHIIn = GetPHITranslatedValue(BC->getOperand(0), CurBB, Pred, TD); - if (PHIIn == 0) return 0; - - // Constants are trivial to phi translate. - if (Constant *C = dyn_cast<Constant>(PHIIn)) - return ConstantExpr::getBitCast(C, BC->getType()); - - // Otherwise we have to see if a bitcasted version of the incoming pointer - // is available. If so, we can use it, otherwise we have to fail. - for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end(); - UI != E; ++UI) { - if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) - if (BCI->getType() == BC->getType()) - return BCI; - } - return 0; - } - - // Handle getelementptr with at least one PHI translatable operand. - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { - SmallVector<Value*, 8> GEPOps; - BasicBlock *CurBB = GEP->getParent(); - for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { - Value *GEPOp = GEP->getOperand(i); - // No PHI translation is needed of operands whose values are live in to - // the predecessor block. - if (!isa<Instruction>(GEPOp) || - cast<Instruction>(GEPOp)->getParent() != CurBB) { - GEPOps.push_back(GEPOp); - continue; - } - - // If the operand is a phi node, do phi translation. - Value *InOp = GetPHITranslatedValue(GEPOp, CurBB, Pred, TD); - if (InOp == 0) return 0; - - GEPOps.push_back(InOp); - } - - // Simplify the GEP to handle 'gep x, 0' -> x etc. - if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD)) - return V; - - // Scan to see if we have this GEP available. - Value *APHIOp = GEPOps[0]; - for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end(); - UI != E; ++UI) { - if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI)) - if (GEPI->getType() == GEP->getType() && - GEPI->getNumOperands() == GEPOps.size() && - GEPI->getParent()->getParent() == CurBB->getParent()) { - bool Mismatch = false; - for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) - if (GEPI->getOperand(i) != GEPOps[i]) { - Mismatch = true; - break; - } - if (!Mismatch) - return GEPI; - } - } - return 0; - } - - // Handle add with a constant RHS. - if (Inst->getOpcode() == Instruction::Add && - isa<ConstantInt>(Inst->getOperand(1))) { - // PHI translate the LHS. - Value *LHS; - Constant *RHS = cast<ConstantInt>(Inst->getOperand(1)); - Instruction *OpI = dyn_cast<Instruction>(Inst->getOperand(0)); - bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap(); - bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap(); - - if (OpI == 0 || OpI->getParent() != Inst->getParent()) - LHS = Inst->getOperand(0); - else { - LHS = GetPHITranslatedValue(Inst->getOperand(0), CurBB, Pred, TD); - if (LHS == 0) - return 0; - } - - // If the PHI translated LHS is an add of a constant, fold the immediates. - if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS)) - if (BOp->getOpcode() == Instruction::Add) - if (ConstantInt *CI = dyn_cast<ConstantInt>(BOp->getOperand(1))) { - LHS = BOp->getOperand(0); - RHS = ConstantExpr::getAdd(RHS, CI); - isNSW = isNUW = false; - } - - // See if the add simplifies away. - if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD)) - return Res; - - // Otherwise, see if we have this add available somewhere. - for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end(); - UI != E; ++UI) { - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI)) - if (BO->getOperand(0) == LHS && BO->getOperand(1) == RHS && - BO->getParent()->getParent() == CurBB->getParent()) - return BO; - } - - return 0; - } - - return 0; -} - -/// GetAvailablePHITranslatePointer - Return the value computed by -/// PHITranslatePointer if it dominates PredBB, otherwise return null. -Value *MemoryDependenceAnalysis:: -GetAvailablePHITranslatedValue(Value *V, - BasicBlock *CurBB, BasicBlock *PredBB, - const TargetData *TD, - const DominatorTree &DT) const { - // See if PHI translation succeeds. - V = GetPHITranslatedValue(V, CurBB, PredBB, TD); - if (V == 0) return 0; - - // Make sure the value is live in the predecessor. - if (Instruction *Inst = dyn_cast_or_null<Instruction>(V)) - if (!DT.dominates(Inst->getParent(), PredBB)) - return 0; - return V; -} - - -/// InsertPHITranslatedPointer - Insert a computation of the PHI translated -/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB -/// block. All newly created instructions are added to the NewInsts list. -/// -Value *MemoryDependenceAnalysis:: -InsertPHITranslatedPointer(Value *InVal, BasicBlock *CurBB, - BasicBlock *PredBB, const TargetData *TD, - const DominatorTree &DT, - SmallVectorImpl<Instruction*> &NewInsts) const { - // See if we have a version of this value already available and dominating - // PredBB. If so, there is no need to insert a new copy. - if (Value *Res = GetAvailablePHITranslatedValue(InVal, CurBB, PredBB, TD, DT)) - return Res; - - // If we don't have an available version of this value, it must be an - // instruction. - Instruction *Inst = cast<Instruction>(InVal); - - // Handle bitcast of PHI translatable value. - if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) { - Value *OpVal = InsertPHITranslatedPointer(BC->getOperand(0), - CurBB, PredBB, TD, DT, NewInsts); - if (OpVal == 0) return 0; - - // Otherwise insert a bitcast at the end of PredBB. - BitCastInst *New = new BitCastInst(OpVal, InVal->getType(), - InVal->getName()+".phi.trans.insert", - PredBB->getTerminator()); - NewInsts.push_back(New); - return New; - } - - // Handle getelementptr with at least one PHI operand. - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { - SmallVector<Value*, 8> GEPOps; - BasicBlock *CurBB = GEP->getParent(); - for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { - Value *OpVal = InsertPHITranslatedPointer(GEP->getOperand(i), - CurBB, PredBB, TD, DT, NewInsts); - if (OpVal == 0) return 0; - GEPOps.push_back(OpVal); - } - - GetElementPtrInst *Result = - GetElementPtrInst::Create(GEPOps[0], GEPOps.begin()+1, GEPOps.end(), - InVal->getName()+".phi.trans.insert", - PredBB->getTerminator()); - Result->setIsInBounds(GEP->isInBounds()); - NewInsts.push_back(Result); - return Result; - } - -#if 0 - // FIXME: This code works, but it is unclear that we actually want to insert - // a big chain of computation in order to make a value available in a block. - // This needs to be evaluated carefully to consider its cost trade offs. - - // Handle add with a constant RHS. - if (Inst->getOpcode() == Instruction::Add && - isa<ConstantInt>(Inst->getOperand(1))) { - // PHI translate the LHS. - Value *OpVal = InsertPHITranslatedPointer(Inst->getOperand(0), - CurBB, PredBB, TD, DT, NewInsts); - if (OpVal == 0) return 0; - - BinaryOperator *Res = BinaryOperator::CreateAdd(OpVal, Inst->getOperand(1), - InVal->getName()+".phi.trans.insert", - PredBB->getTerminator()); - Res->setHasNoSignedWrap(cast<BinaryOperator>(Inst)->hasNoSignedWrap()); - Res->setHasNoUnsignedWrap(cast<BinaryOperator>(Inst)->hasNoUnsignedWrap()); - NewInsts.push_back(Res); - return Res; - } -#endif - - return 0; -} - /// getNonLocalPointerDepFromBB - Perform a dependency query based on /// pointer/pointeesize starting at the end of StartBB. Add any clobber/def /// results to the results vector and keep track of which blocks are visited in @@ -989,14 +724,14 @@ InsertPHITranslatedPointer(Value *InVal, BasicBlock *CurBB, /// not compute dependence information for some reason. This should be treated /// as a clobber dependence on the first instruction in the predecessor block. bool MemoryDependenceAnalysis:: -getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, +getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize, bool isLoad, BasicBlock *StartBB, SmallVectorImpl<NonLocalDepEntry> &Result, DenseMap<BasicBlock*, Value*> &Visited, bool SkipFirstBlock) { // Look up the cached info for Pointer. - ValueIsLoadPair CacheKey(Pointer, isLoad); + ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad); std::pair<BBSkipFirstBlockPair, NonLocalDepInfo> *CacheInfo = &NonLocalPointerDeps[CacheKey]; @@ -1013,8 +748,9 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, if (!Visited.empty()) { for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); I != E; ++I) { - DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->first); - if (VI == Visited.end() || VI->second == Pointer) continue; + DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB()); + if (VI == Visited.end() || VI->second == Pointer.getAddr()) + continue; // We have a pointer mismatch in a block. Just return clobber, saying // that something was clobbered in this result. We could also do a @@ -1025,8 +761,8 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); I != E; ++I) { - Visited.insert(std::make_pair(I->first, Pointer)); - if (!I->second.isNonLocal()) + Visited.insert(std::make_pair(I->getBB(), Pointer.getAddr())); + if (!I->getResult().isNonLocal()) Result.push_back(*I); } ++NumCacheCompleteNonLocalPtr; @@ -1065,30 +801,27 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, // Get the dependency info for Pointer in BB. If we have cached // information, we will use it, otherwise we compute it. DEBUG(AssertSorted(*Cache, NumSortedEntries)); - MemDepResult Dep = GetNonLocalInfoForBlock(Pointer, PointeeSize, isLoad, - BB, Cache, NumSortedEntries); + MemDepResult Dep = GetNonLocalInfoForBlock(Pointer.getAddr(), PointeeSize, + isLoad, BB, Cache, + NumSortedEntries); // If we got a Def or Clobber, add this to the list of results. if (!Dep.isNonLocal()) { - Result.push_back(NonLocalDepEntry(BB, Dep)); + Result.push_back(NonLocalDepEntry(BB, Dep, Pointer.getAddr())); continue; } } // If 'Pointer' is an instruction defined in this block, then we need to do // phi translation to change it into a value live in the predecessor block. - // If phi translation fails, then we can't continue dependence analysis. - Instruction *PtrInst = dyn_cast<Instruction>(Pointer); - bool NeedsPHITranslation = PtrInst && PtrInst->getParent() == BB; - - // If no PHI translation is needed, just add all the predecessors of this - // block to scan them as well. - if (!NeedsPHITranslation) { + // If not, we just add the predecessors to the worklist and scan them with + // the same Pointer. + if (!Pointer.NeedsPHITranslationFromBlock(BB)) { SkipFirstBlock = false; for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { // Verify that we haven't looked at this block yet. std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool> - InsertRes = Visited.insert(std::make_pair(*PI, Pointer)); + InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr())); if (InsertRes.second) { // First time we've looked at *PI. Worklist.push_back(*PI); @@ -1098,16 +831,17 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, // If we have seen this block before, but it was with a different // pointer then we have a phi translation failure and we have to treat // this as a clobber. - if (InsertRes.first->second != Pointer) + if (InsertRes.first->second != Pointer.getAddr()) goto PredTranslationFailure; } continue; } - // If we do need to do phi translation, then there are a bunch of different - // cases, because we have to find a Value* live in the predecessor block. We - // know that PtrInst is defined in this block at least. - + // We do need to do phi translation, if we know ahead of time we can't phi + // translate this value, don't even try. + if (!Pointer.IsPotentiallyPHITranslatable()) + goto PredTranslationFailure; + // We may have added values to the cache list before this PHI translation. // If so, we haven't done anything to ensure that the cache remains sorted. // Sort it now (if needed) so that recursive invocations of @@ -1117,19 +851,17 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, SortNonLocalDepInfoCache(*Cache, NumSortedEntries); NumSortedEntries = Cache->size(); } - - // If this is a computation derived from a PHI node, use the suitably - // translated incoming values for each pred as the phi translated version. - if (!isPHITranslatable(PtrInst)) - goto PredTranslationFailure; - Cache = 0; - + for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { BasicBlock *Pred = *PI; - // Get the PHI translated pointer in this predecessor. This can fail and - // return null if not translatable. - Value *PredPtr = GetPHITranslatedValue(PtrInst, BB, Pred, TD); + + // Get the PHI translated pointer in this predecessor. This can fail if + // not translatable, in which case the getAddr() returns null. + PHITransAddr PredPointer(Pointer); + PredPointer.PHITranslateValue(BB, Pred); + + Value *PredPtrVal = PredPointer.getAddr(); // Check to see if we have already visited this pred block with another // pointer. If so, we can't do this lookup. This failure can occur @@ -1137,12 +869,12 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, // the successor translates to a pointer value different than the // pointer the block was first analyzed with. std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool> - InsertRes = Visited.insert(std::make_pair(Pred, PredPtr)); + InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal)); if (!InsertRes.second) { // If the predecessor was visited with PredPtr, then we already did // the analysis and can ignore it. - if (InsertRes.first->second == PredPtr) + if (InsertRes.first->second == PredPtrVal) continue; // Otherwise, the block was previously analyzed with a different @@ -1155,10 +887,11 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, // predecessor, then we have to assume that the pointer is clobbered in // that predecessor. We can still do PRE of the load, which would insert // a computation of the pointer in this predecessor. - if (PredPtr == 0) { + if (PredPtrVal == 0) { // Add the entry to the Result list. NonLocalDepEntry Entry(Pred, - MemDepResult::getClobber(Pred->getTerminator())); + MemDepResult::getClobber(Pred->getTerminator()), + PredPtrVal); Result.push_back(Entry); // Add it to the cache for this CacheKey so that subsequent queries get @@ -1167,27 +900,27 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, MemoryDependenceAnalysis::NonLocalDepInfo::iterator It = std::upper_bound(Cache->begin(), Cache->end(), Entry); - if (It != Cache->begin() && prior(It)->first == Pred) + if (It != Cache->begin() && (It-1)->getBB() == Pred) --It; - if (It == Cache->end() || It->first != Pred) { + if (It == Cache->end() || It->getBB() != Pred) { Cache->insert(It, Entry); // Add it to the reverse map. ReverseNonLocalPtrDeps[Pred->getTerminator()].insert(CacheKey); - } else if (!It->second.isDirty()) { + } else if (!It->getResult().isDirty()) { // noop - } else if (It->second.getInst() == Pred->getTerminator()) { + } else if (It->getResult().getInst() == Pred->getTerminator()) { // Same instruction, clear the dirty marker. - It->second = Entry.second; - } else if (It->second.getInst() == 0) { + It->setResult(Entry.getResult(), PredPtrVal); + } else if (It->getResult().getInst() == 0) { // Dirty, with no instruction, just add this. - It->second = Entry.second; + It->setResult(Entry.getResult(), PredPtrVal); ReverseNonLocalPtrDeps[Pred->getTerminator()].insert(CacheKey); } else { // Otherwise, dirty with a different instruction. - RemoveFromReverseMap(ReverseNonLocalPtrDeps, It->second.getInst(), - CacheKey); - It->second = Entry.second; + RemoveFromReverseMap(ReverseNonLocalPtrDeps, + It->getResult().getInst(), CacheKey); + It->setResult(Entry.getResult(),PredPtrVal); ReverseNonLocalPtrDeps[Pred->getTerminator()].insert(CacheKey); } Cache = 0; @@ -1201,7 +934,7 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, // If we have a problem phi translating, fall through to the code below // to handle the failure condition. - if (getNonLocalPointerDepFromBB(PredPtr, PointeeSize, isLoad, Pred, + if (getNonLocalPointerDepFromBB(PredPointer, PointeeSize, isLoad, Pred, Result, Visited)) goto PredTranslationFailure; } @@ -1245,12 +978,12 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) { assert(I != Cache->rend() && "Didn't find current block??"); - if (I->first != BB) + if (I->getBB() != BB) continue; - assert(I->second.isNonLocal() && + assert(I->getResult().isNonLocal() && "Should only be here with transparent block"); - I->second = MemDepResult::getClobber(BB->begin()); + I->setResult(MemDepResult::getClobber(BB->begin()), Pointer.getAddr()); ReverseNonLocalPtrDeps[BB->begin()].insert(CacheKey); Result.push_back(*I); break; @@ -1276,9 +1009,9 @@ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) { NonLocalDepInfo &PInfo = It->second.second; for (unsigned i = 0, e = PInfo.size(); i != e; ++i) { - Instruction *Target = PInfo[i].second.getInst(); + Instruction *Target = PInfo[i].getResult().getInst(); if (Target == 0) continue; // Ignore non-local dep results. - assert(Target->getParent() == PInfo[i].first); + assert(Target->getParent() == PInfo[i].getBB()); // Eliminating the dirty entry from 'Cache', so update the reverse info. RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P); @@ -1315,7 +1048,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { NonLocalDepInfo &BlockMap = NLDI->second.first; for (NonLocalDepInfo::iterator DI = BlockMap.begin(), DE = BlockMap.end(); DI != DE; ++DI) - if (Instruction *Inst = DI->second.getInst()) + if (Instruction *Inst = DI->getResult().getInst()) RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst); NonLocalDeps.erase(NLDI); } @@ -1403,10 +1136,10 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { for (NonLocalDepInfo::iterator DI = INLD.first.begin(), DE = INLD.first.end(); DI != DE; ++DI) { - if (DI->second.getInst() != RemInst) continue; + if (DI->getResult().getInst() != RemInst) continue; // Convert to a dirty entry for the subsequent instruction. - DI->second = NewDirtyVal; + DI->setResult(NewDirtyVal, DI->getAddress()); if (Instruction *NextI = NewDirtyVal.getInst()) ReverseDepsToAdd.push_back(std::make_pair(NextI, *I)); @@ -1445,10 +1178,10 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { // Update any entries for RemInst to use the instruction after it. for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end(); DI != DE; ++DI) { - if (DI->second.getInst() != RemInst) continue; + if (DI->getResult().getInst() != RemInst) continue; // Convert to a dirty entry for the subsequent instruction. - DI->second = NewDirtyVal; + DI->setResult(NewDirtyVal, DI->getAddress()); if (Instruction *NewDirtyInst = NewDirtyVal.getInst()) ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P)); @@ -1489,7 +1222,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { const NonLocalDepInfo &Val = I->second.second; for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end(); II != E; ++II) - assert(II->second.getInst() != D && "Inst occurs as NLPD value"); + assert(II->getResult().getInst() != D && "Inst occurs as NLPD value"); } for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(), @@ -1498,7 +1231,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { const PerInstNLInfo &INLD = I->second; for (NonLocalDepInfo::const_iterator II = INLD.first.begin(), EE = INLD.first.end(); II != EE; ++II) - assert(II->second.getInst() != D && "Inst occurs in data structures"); + assert(II->getResult().getInst() != D && "Inst occurs in data structures"); } for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(), diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp new file mode 100644 index 0000000..07e2919 --- /dev/null +++ b/lib/Analysis/PHITransAddr.cpp @@ -0,0 +1,432 @@ +//===- PHITransAddr.cpp - PHI Translation for Addresses -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PHITransAddr class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +static bool CanPHITrans(Instruction *Inst) { + if (isa<PHINode>(Inst) || + isa<BitCastInst>(Inst) || + isa<GetElementPtrInst>(Inst)) + return true; + + if (Inst->getOpcode() == Instruction::Add && + isa<ConstantInt>(Inst->getOperand(1))) + return true; + + // cerr << "MEMDEP: Could not PHI translate: " << *Pointer; + // if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst)) + // cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0); + return false; +} + +void PHITransAddr::dump() const { + if (Addr == 0) { + errs() << "PHITransAddr: null\n"; + return; + } + errs() << "PHITransAddr: " << *Addr << "\n"; + for (unsigned i = 0, e = InstInputs.size(); i != e; ++i) + errs() << " Input #" << i << " is " << *InstInputs[i] << "\n"; +} + + +static bool VerifySubExpr(Value *Expr, + SmallVectorImpl<Instruction*> &InstInputs) { + // If this is a non-instruction value, there is nothing to do. + Instruction *I = dyn_cast<Instruction>(Expr); + if (I == 0) return true; + + // If it's an instruction, it is either in Tmp or its operands recursively + // are. + SmallVectorImpl<Instruction*>::iterator Entry = + std::find(InstInputs.begin(), InstInputs.end(), I); + if (Entry != InstInputs.end()) { + InstInputs.erase(Entry); + return true; + } + + // If it isn't in the InstInputs list it is a subexpr incorporated into the + // address. Sanity check that it is phi translatable. + if (!CanPHITrans(I)) { + errs() << "Non phi translatable instruction found in PHITransAddr, either " + "something is missing from InstInputs or CanPHITrans is wrong:\n"; + errs() << *I << '\n'; + return false; + } + + // Validate the operands of the instruction. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (!VerifySubExpr(I->getOperand(i), InstInputs)) + return false; + + return true; +} + +/// Verify - Check internal consistency of this data structure. If the +/// structure is valid, it returns true. If invalid, it prints errors and +/// returns false. +bool PHITransAddr::Verify() const { + if (Addr == 0) return true; + + SmallVector<Instruction*, 8> Tmp(InstInputs.begin(), InstInputs.end()); + + if (!VerifySubExpr(Addr, Tmp)) + return false; + + if (!Tmp.empty()) { + errs() << "PHITransAddr inconsistent, contains extra instructions:\n"; + for (unsigned i = 0, e = InstInputs.size(); i != e; ++i) + errs() << " InstInput #" << i << " is " << *InstInputs[i] << "\n"; + return false; + } + + // a-ok. + return true; +} + + +/// IsPotentiallyPHITranslatable - If this needs PHI translation, return true +/// if we have some hope of doing it. This should be used as a filter to +/// avoid calling PHITranslateValue in hopeless situations. +bool PHITransAddr::IsPotentiallyPHITranslatable() const { + // If the input value is not an instruction, or if it is not defined in CurBB, + // then we don't need to phi translate it. + Instruction *Inst = dyn_cast<Instruction>(Addr); + return Inst == 0 || CanPHITrans(Inst); +} + + +static void RemoveInstInputs(Value *V, + SmallVectorImpl<Instruction*> &InstInputs) { + Instruction *I = dyn_cast<Instruction>(V); + if (I == 0) return; + + // If the instruction is in the InstInputs list, remove it. + SmallVectorImpl<Instruction*>::iterator Entry = + std::find(InstInputs.begin(), InstInputs.end(), I); + if (Entry != InstInputs.end()) { + InstInputs.erase(Entry); + return; + } + + assert(!isa<PHINode>(I) && "Error, removing something that isn't an input"); + + // Otherwise, it must have instruction inputs itself. Zap them recursively. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(i))) + RemoveInstInputs(Op, InstInputs); + } +} + +Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, + BasicBlock *PredBB) { + // If this is a non-instruction value, it can't require PHI translation. + Instruction *Inst = dyn_cast<Instruction>(V); + if (Inst == 0) return V; + + // Determine whether 'Inst' is an input to our PHI translatable expression. + bool isInput = std::count(InstInputs.begin(), InstInputs.end(), Inst); + + // Handle inputs instructions if needed. + if (isInput) { + if (Inst->getParent() != CurBB) { + // If it is an input defined in a different block, then it remains an + // input. + return Inst; + } + + // If 'Inst' is defined in this block and is an input that needs to be phi + // translated, we need to incorporate the value into the expression or fail. + + // In either case, the instruction itself isn't an input any longer. + InstInputs.erase(std::find(InstInputs.begin(), InstInputs.end(), Inst)); + + // If this is a PHI, go ahead and translate it. + if (PHINode *PN = dyn_cast<PHINode>(Inst)) + return AddAsInput(PN->getIncomingValueForBlock(PredBB)); + + // If this is a non-phi value, and it is analyzable, we can incorporate it + // into the expression by making all instruction operands be inputs. + if (!CanPHITrans(Inst)) + return 0; + + // All instruction operands are now inputs (and of course, they may also be + // defined in this block, so they may need to be phi translated themselves. + for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) + if (Instruction *Op = dyn_cast<Instruction>(Inst->getOperand(i))) + InstInputs.push_back(Op); + } + + // Ok, it must be an intermediate result (either because it started that way + // or because we just incorporated it into the expression). See if its + // operands need to be phi translated, and if so, reconstruct it. + + if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) { + Value *PHIIn = PHITranslateSubExpr(BC->getOperand(0), CurBB, PredBB); + if (PHIIn == 0) return 0; + if (PHIIn == BC->getOperand(0)) + return BC; + + // Find an available version of this cast. + + // Constants are trivial to find. + if (Constant *C = dyn_cast<Constant>(PHIIn)) + return AddAsInput(ConstantExpr::getBitCast(C, BC->getType())); + + // Otherwise we have to see if a bitcasted version of the incoming pointer + // is available. If so, we can use it, otherwise we have to fail. + for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end(); + UI != E; ++UI) { + if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) + if (BCI->getType() == BC->getType()) + return BCI; + } + return 0; + } + + // Handle getelementptr with at least one PHI translatable operand. + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { + SmallVector<Value*, 8> GEPOps; + bool AnyChanged = false; + for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { + Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB); + if (GEPOp == 0) return 0; + + AnyChanged |= GEPOp != GEP->getOperand(i); + GEPOps.push_back(GEPOp); + } + + if (!AnyChanged) + return GEP; + + // Simplify the GEP to handle 'gep x, 0' -> x etc. + if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD)) { + for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) + RemoveInstInputs(GEPOps[i], InstInputs); + + return AddAsInput(V); + } + + // Scan to see if we have this GEP available. + Value *APHIOp = GEPOps[0]; + for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end(); + UI != E; ++UI) { + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI)) + if (GEPI->getType() == GEP->getType() && + GEPI->getNumOperands() == GEPOps.size() && + GEPI->getParent()->getParent() == CurBB->getParent()) { + bool Mismatch = false; + for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) + if (GEPI->getOperand(i) != GEPOps[i]) { + Mismatch = true; + break; + } + if (!Mismatch) + return GEPI; + } + } + return 0; + } + + // Handle add with a constant RHS. + if (Inst->getOpcode() == Instruction::Add && + isa<ConstantInt>(Inst->getOperand(1))) { + // PHI translate the LHS. + Constant *RHS = cast<ConstantInt>(Inst->getOperand(1)); + bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap(); + bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap(); + + Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB); + if (LHS == 0) return 0; + + // If the PHI translated LHS is an add of a constant, fold the immediates. + if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS)) + if (BOp->getOpcode() == Instruction::Add) + if (ConstantInt *CI = dyn_cast<ConstantInt>(BOp->getOperand(1))) { + LHS = BOp->getOperand(0); + RHS = ConstantExpr::getAdd(RHS, CI); + isNSW = isNUW = false; + + // If the old 'LHS' was an input, add the new 'LHS' as an input. + if (std::count(InstInputs.begin(), InstInputs.end(), BOp)) { + RemoveInstInputs(BOp, InstInputs); + AddAsInput(LHS); + } + } + + // See if the add simplifies away. + if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD)) { + // If we simplified the operands, the LHS is no longer an input, but Res + // is. + RemoveInstInputs(LHS, InstInputs); + return AddAsInput(Res); + } + + // If we didn't modify the add, just return it. + if (LHS == Inst->getOperand(0) && RHS == Inst->getOperand(1)) + return Inst; + + // Otherwise, see if we have this add available somewhere. + for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end(); + UI != E; ++UI) { + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI)) + if (BO->getOpcode() == Instruction::Add && + BO->getOperand(0) == LHS && BO->getOperand(1) == RHS && + BO->getParent()->getParent() == CurBB->getParent()) + return BO; + } + + return 0; + } + + // Otherwise, we failed. + return 0; +} + + +/// PHITranslateValue - PHI translate the current address up the CFG from +/// CurBB to Pred, updating our state the reflect any needed changes. This +/// returns true on failure and sets Addr to null. +bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB) { + assert(Verify() && "Invalid PHITransAddr!"); + Addr = PHITranslateSubExpr(Addr, CurBB, PredBB); + assert(Verify() && "Invalid PHITransAddr!"); + return Addr == 0; +} + +/// GetAvailablePHITranslatedSubExpr - Return the value computed by +/// PHITranslateSubExpr if it dominates PredBB, otherwise return null. +Value *PHITransAddr:: +GetAvailablePHITranslatedSubExpr(Value *V, BasicBlock *CurBB,BasicBlock *PredBB, + const DominatorTree &DT) const { + PHITransAddr Tmp(V, TD); + Tmp.PHITranslateValue(CurBB, PredBB); + + // See if PHI translation succeeds. + V = Tmp.getAddr(); + + // Make sure the value is live in the predecessor. + if (Instruction *Inst = dyn_cast_or_null<Instruction>(V)) + if (!DT.dominates(Inst->getParent(), PredBB)) + return 0; + return V; +} + + +/// PHITranslateWithInsertion - PHI translate this value into the specified +/// predecessor block, inserting a computation of the value if it is +/// unavailable. +/// +/// All newly created instructions are added to the NewInsts list. This +/// returns null on failure. +/// +Value *PHITransAddr:: +PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB, + const DominatorTree &DT, + SmallVectorImpl<Instruction*> &NewInsts) { + unsigned NISize = NewInsts.size(); + + // Attempt to PHI translate with insertion. + Addr = InsertPHITranslatedSubExpr(Addr, CurBB, PredBB, DT, NewInsts); + + // If successful, return the new value. + if (Addr) return Addr; + + // If not, destroy any intermediate instructions inserted. + while (NewInsts.size() != NISize) + NewInsts.pop_back_val()->eraseFromParent(); + return 0; +} + + +/// InsertPHITranslatedPointer - Insert a computation of the PHI translated +/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB +/// block. All newly created instructions are added to the NewInsts list. +/// This returns null on failure. +/// +Value *PHITransAddr:: +InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, + BasicBlock *PredBB, const DominatorTree &DT, + SmallVectorImpl<Instruction*> &NewInsts) { + // See if we have a version of this value already available and dominating + // PredBB. If so, there is no need to insert a new instance of it. + if (Value *Res = GetAvailablePHITranslatedSubExpr(InVal, CurBB, PredBB, DT)) + return Res; + + // If we don't have an available version of this value, it must be an + // instruction. + Instruction *Inst = cast<Instruction>(InVal); + + // Handle bitcast of PHI translatable value. + if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) { + Value *OpVal = InsertPHITranslatedSubExpr(BC->getOperand(0), + CurBB, PredBB, DT, NewInsts); + if (OpVal == 0) return 0; + + // Otherwise insert a bitcast at the end of PredBB. + BitCastInst *New = new BitCastInst(OpVal, InVal->getType(), + InVal->getName()+".phi.trans.insert", + PredBB->getTerminator()); + NewInsts.push_back(New); + return New; + } + + // Handle getelementptr with at least one PHI operand. + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { + SmallVector<Value*, 8> GEPOps; + BasicBlock *CurBB = GEP->getParent(); + for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { + Value *OpVal = InsertPHITranslatedSubExpr(GEP->getOperand(i), + CurBB, PredBB, DT, NewInsts); + if (OpVal == 0) return 0; + GEPOps.push_back(OpVal); + } + + GetElementPtrInst *Result = + GetElementPtrInst::Create(GEPOps[0], GEPOps.begin()+1, GEPOps.end(), + InVal->getName()+".phi.trans.insert", + PredBB->getTerminator()); + Result->setIsInBounds(GEP->isInBounds()); + NewInsts.push_back(Result); + return Result; + } + +#if 0 + // FIXME: This code works, but it is unclear that we actually want to insert + // a big chain of computation in order to make a value available in a block. + // This needs to be evaluated carefully to consider its cost trade offs. + + // Handle add with a constant RHS. + if (Inst->getOpcode() == Instruction::Add && + isa<ConstantInt>(Inst->getOperand(1))) { + // PHI translate the LHS. + Value *OpVal = InsertPHITranslatedSubExpr(Inst->getOperand(0), + CurBB, PredBB, DT, NewInsts); + if (OpVal == 0) return 0; + + BinaryOperator *Res = BinaryOperator::CreateAdd(OpVal, Inst->getOperand(1), + InVal->getName()+".phi.trans.insert", + PredBB->getTerminator()); + Res->setHasNoSignedWrap(cast<BinaryOperator>(Inst)->hasNoSignedWrap()); + Res->setHasNoUnsignedWrap(cast<BinaryOperator>(Inst)->hasNoUnsignedWrap()); + NewInsts.push_back(Res); + return Res; + } +#endif + + return 0; +} diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp index e767891..8148429 100644 --- a/lib/Analysis/ProfileEstimatorPass.cpp +++ b/lib/Analysis/ProfileEstimatorPass.cpp @@ -35,6 +35,7 @@ namespace { LoopInfo *LI; std::set<BasicBlock*> BBToVisit; std::map<Loop*,double> LoopExitWeights; + std::map<Edge,double> MinimalWeight; public: static char ID; // Class identification, replacement for typeinfo explicit ProfileEstimatorPass(const double execcount = 0) @@ -91,7 +92,7 @@ static void inline printEdgeError(ProfileInfo::Edge e, const char *M) { void inline ProfileEstimatorPass::printEdgeWeight(Edge E) { DEBUG(errs() << "-- Weight of Edge " << E << ":" - << format("%g", getEdgeWeight(E)) << "\n"); + << format("%20.20g", getEdgeWeight(E)) << "\n"); } // recurseBasicBlock() - This calculates the ProfileInfo estimation for a @@ -174,6 +175,12 @@ void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) { double w = getEdgeWeight(*ei); if (w == MissingValue) { Edges.push_back(*ei); + // Check if there is a necessary minimal weight, if yes, subtract it + // from weight. + if (MinimalWeight.find(*ei) != MinimalWeight.end()) { + incoming -= MinimalWeight[*ei]; + DEBUG(errs() << "Reserving " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n"); + } } else { incoming -= w; } @@ -191,11 +198,43 @@ void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) { printEdgeWeight(edge); } } - // Distribute remaining weight onto the exit edges. + + // Distribute remaining weight to the exting edges. To prevent fractions + // from building up and provoking precision problems the weight which is to + // be distributed is split and the rounded, the last edge gets a somewhat + // bigger value, but we are close enough for an estimation. + double fraction = floor(incoming/Edges.size()); for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end(); ei != ee; ++ei) { - EdgeInformation[BB->getParent()][*ei] += incoming/Edges.size(); + double w = 0; + if (ei != (ee-1)) { + w = fraction; + incoming -= fraction; + } else { + w = incoming; + } + EdgeInformation[BB->getParent()][*ei] += w; + // Read necessary minimal weight. + if (MinimalWeight.find(*ei) != MinimalWeight.end()) { + EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei]; + DEBUG(errs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n"); + } printEdgeWeight(*ei); + + // Add minimal weight to paths to all exit edges, this is used to ensure + // that enough flow is reaching this edges. + Path p; + const BasicBlock *Dest = GetPath(BB, (*ei).first, p, GetPathToDest); + while (Dest != BB) { + const BasicBlock *Parent = p.find(Dest)->second; + Edge e = getEdge(Parent, Dest); + if (MinimalWeight.find(e) == MinimalWeight.end()) { + MinimalWeight[e] = 0; + } + MinimalWeight[e] += w; + DEBUG(errs() << "Minimal Weight for " << e << ": " << format("%.20g",MinimalWeight[e]) << "\n"); + Dest = Parent; + } } // Increase flow into the loop. BBWeight *= (ExecCount+1); @@ -203,7 +242,7 @@ void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) { BlockInformation[BB->getParent()][BB] = BBWeight; // Up until now we considered only the loop exiting edges, now we have a - // definite block weight and must ditribute this onto the outgoing edges. + // definite block weight and must distribute this onto the outgoing edges. // Since there may be already flow attached to some of the edges, read this // flow first and remember the edges that have still now flow attached. Edges.clear(); @@ -225,15 +264,32 @@ void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) { BBWeight -= getEdgeWeight(edge); } else { Edges.push_back(edge); + // If minimal weight is necessary, reserve weight by subtracting weight + // from block weight, this is readded later on. + if (MinimalWeight.find(edge) != MinimalWeight.end()) { + BBWeight -= MinimalWeight[edge]; + DEBUG(errs() << "Reserving " << format("%.20g",MinimalWeight[edge]) << " at " << edge << "\n"); + } } } } + double fraction = floor(BBWeight/Edges.size()); // Finally we know what flow is still not leaving the block, distribute this // flow onto the empty edges. for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end(); ei != ee; ++ei) { - EdgeInformation[BB->getParent()][*ei] += BBWeight/Edges.size(); + if (ei != (ee-1)) { + EdgeInformation[BB->getParent()][*ei] += fraction; + BBWeight -= fraction; + } else { + EdgeInformation[BB->getParent()][*ei] += BBWeight; + } + // Readd minial necessary weight. + if (MinimalWeight.find(*ei) != MinimalWeight.end()) { + EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei]; + DEBUG(errs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n"); + } printEdgeWeight(*ei); } @@ -260,20 +316,24 @@ bool ProfileEstimatorPass::runOnFunction(Function &F) { for (Function::iterator bi = F.begin(), be = F.end(); bi != be; ++bi) BBToVisit.insert(bi); + // Clear Minimal Edges. + MinimalWeight.clear(); + DEBUG(errs() << "Working on function " << F.getNameStr() << "\n"); // Since the entry block is the first one and has no predecessors, the edge // (0,entry) is inserted with the starting weight of 1. BasicBlock *entry = &F.getEntryBlock(); - BlockInformation[&F][entry] = 1; + BlockInformation[&F][entry] = pow(2.0, 32.0); Edge edge = getEdge(0,entry); - EdgeInformation[&F][edge] = 1; + EdgeInformation[&F][edge] = BlockInformation[&F][entry]; printEdgeWeight(edge); // Since recurseBasicBlock() maybe returns with a block which was not fully - // estimated, use recurseBasicBlock() until everything is calculated. + // estimated, use recurseBasicBlock() until everything is calculated. + bool cleanup = false; recurseBasicBlock(entry); - while (BBToVisit.size() > 0) { + while (BBToVisit.size() > 0 && !cleanup) { // Remember number of open blocks, this is later used to check if progress // was made. unsigned size = BBToVisit.size(); @@ -287,21 +347,65 @@ bool ProfileEstimatorPass::runOnFunction(Function &F) { if (BBToVisit.size() < size) break; } - // If there was not a single block resovled, make some assumptions. + // If there was not a single block resolved, make some assumptions. if (BBToVisit.size() == size) { - BasicBlock *BB = *(BBToVisit.begin()); - // Since this BB was not calculated because of missing incoming edges, - // set these edges to zero. - for (pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); - bbi != bbe; ++bbi) { - Edge e = getEdge(*bbi,BB); - double w = getEdgeWeight(e); - if (w == MissingValue) { - EdgeInformation[&F][e] = 0; - DEBUG(errs() << "Assuming edge weight: "); - printEdgeWeight(e); + bool found = false; + for (std::set<BasicBlock*>::iterator BBI = BBToVisit.begin(), BBE = BBToVisit.end(); + (BBI != BBE) && (!found); ++BBI) { + BasicBlock *BB = *BBI; + // Try each predecessor if it can be assumend. + for (pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + (bbi != bbe) && (!found); ++bbi) { + Edge e = getEdge(*bbi,BB); + double w = getEdgeWeight(e); + // Check that edge from predecessor is still free. + if (w == MissingValue) { + // Check if there is a circle from this block to predecessor. + Path P; + const BasicBlock *Dest = GetPath(BB, *bbi, P, GetPathToDest); + if (Dest != *bbi) { + // If there is no circle, just set edge weight to 0 + EdgeInformation[&F][e] = 0; + DEBUG(errs() << "Assuming edge weight: "); + printEdgeWeight(e); + found = true; + } + } } } + if (!found) { + cleanup = true; + DEBUG(errs() << "No assumption possible in Fuction "<<F.getName()<<", setting all to zero\n"); + } + } + } + // In case there was no safe way to assume edges, set as a last measure, + // set _everything_ to zero. + if (cleanup) { + FunctionInformation[&F] = 0; + BlockInformation[&F].clear(); + EdgeInformation[&F].clear(); + for (Function::const_iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + const BasicBlock *BB = &(*FI); + BlockInformation[&F][BB] = 0; + pred_const_iterator predi = pred_begin(BB), prede = pred_end(BB); + if (predi == prede) { + Edge e = getEdge(0,BB); + setEdgeWeight(e,0); + } + for (;predi != prede; ++predi) { + Edge e = getEdge(*predi,BB); + setEdgeWeight(e,0); + } + succ_const_iterator succi = succ_begin(BB), succe = succ_end(BB); + if (succi == succe) { + Edge e = getEdge(BB,0); + setEdgeWeight(e,0); + } + for (;succi != succe; ++succi) { + Edge e = getEdge(*succi,BB); + setEdgeWeight(e,0); + } } } diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp index 7f24f5a..c49c6e1 100644 --- a/lib/Analysis/ProfileInfo.cpp +++ b/lib/Analysis/ProfileInfo.cpp @@ -11,26 +11,52 @@ // "no profile" implementation. // //===----------------------------------------------------------------------===// - +#define DEBUG_TYPE "profile-info" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ProfileInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Format.h" +#include "llvm/ADT/SmallSet.h" #include <set> +#include <queue> +#include <limits> using namespace llvm; // Register the ProfileInfo interface, providing a nice name to refer to. static RegisterAnalysisGroup<ProfileInfo> Z("Profile Information"); -char ProfileInfo::ID = 0; -ProfileInfo::~ProfileInfo() {} +namespace llvm { + +template <> +ProfileInfoT<MachineFunction, MachineBasicBlock>::ProfileInfoT() {} +template <> +ProfileInfoT<MachineFunction, MachineBasicBlock>::~ProfileInfoT() {} + +template <> +ProfileInfoT<Function, BasicBlock>::ProfileInfoT() { + MachineProfile = 0; +} +template <> +ProfileInfoT<Function, BasicBlock>::~ProfileInfoT() { + if (MachineProfile) delete MachineProfile; +} + +template<> +char ProfileInfoT<Function,BasicBlock>::ID = 0; + +template<> +char ProfileInfoT<MachineFunction, MachineBasicBlock>::ID = 0; + +template<> +const double ProfileInfoT<Function,BasicBlock>::MissingValue = -1; -const double ProfileInfo::MissingValue = -1; +template<> const +double ProfileInfoT<MachineFunction, MachineBasicBlock>::MissingValue = -1; -double ProfileInfo::getExecutionCount(const BasicBlock *BB) { +template<> double +ProfileInfoT<Function,BasicBlock>::getExecutionCount(const BasicBlock *BB) { std::map<const Function*, BlockCounts>::iterator J = BlockInformation.find(BB->getParent()); if (J != BlockInformation.end()) { @@ -39,36 +65,74 @@ double ProfileInfo::getExecutionCount(const BasicBlock *BB) { return I->second; } + double Count = MissingValue; + pred_const_iterator PI = pred_begin(BB), PE = pred_end(BB); // Are there zero predecessors of this block? if (PI == PE) { - // If this is the entry block, look for the Null -> Entry edge. - if (BB == &BB->getParent()->getEntryBlock()) - return getEdgeWeight(getEdge(0, BB)); - else - return 0; // Otherwise, this is a dead block. + Edge e = getEdge(0,BB); + Count = getEdgeWeight(e); + } else { + // Otherwise, if there are predecessors, the execution count of this block is + // the sum of the edge frequencies from the incoming edges. + std::set<const BasicBlock*> ProcessedPreds; + Count = 0; + for (; PI != PE; ++PI) + if (ProcessedPreds.insert(*PI).second) { + double w = getEdgeWeight(getEdge(*PI, BB)); + if (w == MissingValue) { + Count = MissingValue; + break; + } + Count += w; + } } - // Otherwise, if there are predecessors, the execution count of this block is - // the sum of the edge frequencies from the incoming edges. - std::set<const BasicBlock*> ProcessedPreds; - double Count = 0; - for (; PI != PE; ++PI) - if (ProcessedPreds.insert(*PI).second) { - double w = getEdgeWeight(getEdge(*PI, BB)); - if (w == MissingValue) { - Count = MissingValue; - break; - } - Count += w; + // If the predecessors did not suffice to get block weight, try successors. + if (Count == MissingValue) { + + succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB); + + // Are there zero successors of this block? + if (SI == SE) { + Edge e = getEdge(BB,0); + Count = getEdgeWeight(e); + } else { + std::set<const BasicBlock*> ProcessedSuccs; + Count = 0; + for (; SI != SE; ++SI) + if (ProcessedSuccs.insert(*SI).second) { + double w = getEdgeWeight(getEdge(BB, *SI)); + if (w == MissingValue) { + Count = MissingValue; + break; + } + Count += w; + } } + } if (Count != MissingValue) BlockInformation[BB->getParent()][BB] = Count; return Count; } -double ProfileInfo::getExecutionCount(const Function *F) { +template<> +double ProfileInfoT<MachineFunction, MachineBasicBlock>:: + getExecutionCount(const MachineBasicBlock *MBB) { + std::map<const MachineFunction*, BlockCounts>::iterator J = + BlockInformation.find(MBB->getParent()); + if (J != BlockInformation.end()) { + BlockCounts::iterator I = J->second.find(MBB); + if (I != J->second.end()) + return I->second; + } + + return MissingValue; +} + +template<> +double ProfileInfoT<Function,BasicBlock>::getExecutionCount(const Function *F) { std::map<const Function*, double>::iterator J = FunctionInformation.find(F); if (J != FunctionInformation.end()) @@ -83,35 +147,211 @@ double ProfileInfo::getExecutionCount(const Function *F) { return Count; } +template<> +double ProfileInfoT<MachineFunction, MachineBasicBlock>:: + getExecutionCount(const MachineFunction *MF) { + std::map<const MachineFunction*, double>::iterator J = + FunctionInformation.find(MF); + if (J != FunctionInformation.end()) + return J->second; + + double Count = getExecutionCount(&MF->front()); + if (Count != MissingValue) FunctionInformation[MF] = Count; + return Count; +} + +template<> +void ProfileInfoT<Function,BasicBlock>:: + setExecutionCount(const BasicBlock *BB, double w) { + DEBUG(errs() << "Creating Block " << BB->getName() + << " (weight: " << format("%.20g",w) << ")\n"); + BlockInformation[BB->getParent()][BB] = w; +} + +template<> +void ProfileInfoT<MachineFunction, MachineBasicBlock>:: + setExecutionCount(const MachineBasicBlock *MBB, double w) { + DEBUG(errs() << "Creating Block " << MBB->getBasicBlock()->getName() + << " (weight: " << format("%.20g",w) << ")\n"); + BlockInformation[MBB->getParent()][MBB] = w; +} + +template<> +void ProfileInfoT<Function,BasicBlock>::addEdgeWeight(Edge e, double w) { + double oldw = getEdgeWeight(e); + assert (oldw != MissingValue && "Adding weight to Edge with no previous weight"); + DEBUG(errs() << "Adding to Edge " << e + << " (new weight: " << format("%.20g",oldw + w) << ")\n"); + EdgeInformation[getFunction(e)][e] = oldw + w; +} + +template<> +void ProfileInfoT<Function,BasicBlock>:: + addExecutionCount(const BasicBlock *BB, double w) { + double oldw = getExecutionCount(BB); + assert (oldw != MissingValue && "Adding weight to Block with no previous weight"); + DEBUG(errs() << "Adding to Block " << BB->getName() + << " (new weight: " << format("%.20g",oldw + w) << ")\n"); + BlockInformation[BB->getParent()][BB] = oldw + w; +} + +template<> +void ProfileInfoT<Function,BasicBlock>::removeBlock(const BasicBlock *BB) { + std::map<const Function*, BlockCounts>::iterator J = + BlockInformation.find(BB->getParent()); + if (J == BlockInformation.end()) return; + + DEBUG(errs() << "Deleting " << BB->getName() << "\n"); + J->second.erase(BB); +} + +template<> +void ProfileInfoT<Function,BasicBlock>::removeEdge(Edge e) { + std::map<const Function*, EdgeWeights>::iterator J = + EdgeInformation.find(getFunction(e)); + if (J == EdgeInformation.end()) return; + + DEBUG(errs() << "Deleting" << e << "\n"); + J->second.erase(e); +} + +template<> +void ProfileInfoT<Function,BasicBlock>:: + replaceEdge(const Edge &oldedge, const Edge &newedge) { + double w; + if ((w = getEdgeWeight(newedge)) == MissingValue) { + w = getEdgeWeight(oldedge); + DEBUG(errs() << "Replacing " << oldedge << " with " << newedge << "\n"); + } else { + w += getEdgeWeight(oldedge); + DEBUG(errs() << "Adding " << oldedge << " to " << newedge << "\n"); + } + setEdgeWeight(newedge,w); + removeEdge(oldedge); +} + +template<> +const BasicBlock *ProfileInfoT<Function,BasicBlock>:: + GetPath(const BasicBlock *Src, const BasicBlock *Dest, + Path &P, unsigned Mode) { + const BasicBlock *BB = 0; + bool hasFoundPath = false; + + std::queue<const BasicBlock *> BFS; + BFS.push(Src); + + while(BFS.size() && !hasFoundPath) { + BB = BFS.front(); + BFS.pop(); + + succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB); + if (Succ == End) { + P[0] = BB; + if (Mode & GetPathToExit) { + hasFoundPath = true; + BB = 0; + } + } + for(;Succ != End; ++Succ) { + if (P.find(*Succ) != P.end()) continue; + Edge e = getEdge(BB,*Succ); + if ((Mode & GetPathWithNewEdges) && (getEdgeWeight(e) != MissingValue)) continue; + P[*Succ] = BB; + BFS.push(*Succ); + if ((Mode & GetPathToDest) && *Succ == Dest) { + hasFoundPath = true; + BB = *Succ; + break; + } + if ((Mode & GetPathToValue) && (getExecutionCount(*Succ) != MissingValue)) { + hasFoundPath = true; + BB = *Succ; + break; + } + } + } + + return BB; +} + +template<> +void ProfileInfoT<Function,BasicBlock>:: + divertFlow(const Edge &oldedge, const Edge &newedge) { + DEBUG(errs() << "Diverting " << oldedge << " via " << newedge ); + + // First check if the old edge was taken, if not, just delete it... + if (getEdgeWeight(oldedge) == 0) { + removeEdge(oldedge); + return; + } + + Path P; + P[newedge.first] = 0; + P[newedge.second] = newedge.first; + const BasicBlock *BB = GetPath(newedge.second,oldedge.second,P,GetPathToExit | GetPathToDest); + + double w = getEdgeWeight (oldedge); + DEBUG(errs() << ", Weight: " << format("%.20g",w) << "\n"); + do { + const BasicBlock *Parent = P.find(BB)->second; + Edge e = getEdge(Parent,BB); + double oldw = getEdgeWeight(e); + double oldc = getExecutionCount(e.first); + setEdgeWeight(e, w+oldw); + if (Parent != oldedge.first) { + setExecutionCount(e.first, w+oldc); + } + BB = Parent; + } while (BB != newedge.first); + removeEdge(oldedge); +} + /// Replaces all occurences of RmBB in the ProfilingInfo with DestBB. /// This checks all edges of the function the blocks reside in and replaces the /// occurences of RmBB with DestBB. -void ProfileInfo::replaceAllUses(const BasicBlock *RmBB, - const BasicBlock *DestBB) { - DEBUG(errs() << "Replacing " << RmBB->getNameStr() - << " with " << DestBB->getNameStr() << "\n"); +template<> +void ProfileInfoT<Function,BasicBlock>:: + replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB) { + DEBUG(errs() << "Replacing " << RmBB->getName() + << " with " << DestBB->getName() << "\n"); const Function *F = DestBB->getParent(); std::map<const Function*, EdgeWeights>::iterator J = EdgeInformation.find(F); if (J == EdgeInformation.end()) return; - for (EdgeWeights::iterator I = J->second.begin(), E = J->second.end(); - I != E; ++I) { - Edge e = I->first; - Edge newedge; bool foundedge = false; + Edge e, newedge; + bool erasededge = false; + EdgeWeights::iterator I = J->second.begin(), E = J->second.end(); + while(I != E) { + e = (I++)->first; + bool foundedge = false; bool eraseedge = false; if (e.first == RmBB) { - newedge = getEdge(DestBB, e.second); - foundedge = true; + if (e.second == DestBB) { + eraseedge = true; + } else { + newedge = getEdge(DestBB, e.second); + foundedge = true; + } } if (e.second == RmBB) { - newedge = getEdge(e.first, DestBB); - foundedge = true; + if (e.first == DestBB) { + eraseedge = true; + } else { + newedge = getEdge(e.first, DestBB); + foundedge = true; + } } if (foundedge) { - double w = getEdgeWeight(e); - EdgeInformation[F][newedge] = w; - DEBUG(errs() << "Replacing " << e << " with " << newedge << "\n"); - J->second.erase(e); + replaceEdge(e, newedge); + } + if (eraseedge) { + if (erasededge) { + Edge newedge = getEdge(DestBB, DestBB); + replaceEdge(e, newedge); + } else { + removeEdge(e); + erasededge = true; + } } } } @@ -119,10 +359,11 @@ void ProfileInfo::replaceAllUses(const BasicBlock *RmBB, /// Splits an edge in the ProfileInfo and redirects flow over NewBB. /// Since its possible that there is more than one edge in the CFG from FristBB /// to SecondBB its necessary to redirect the flow proporionally. -void ProfileInfo::splitEdge(const BasicBlock *FirstBB, - const BasicBlock *SecondBB, - const BasicBlock *NewBB, - bool MergeIdenticalEdges) { +template<> +void ProfileInfoT<Function,BasicBlock>::splitEdge(const BasicBlock *FirstBB, + const BasicBlock *SecondBB, + const BasicBlock *NewBB, + bool MergeIdenticalEdges) { const Function *F = FirstBB->getParent(); std::map<const Function*, EdgeWeights>::iterator J = EdgeInformation.find(F); @@ -153,7 +394,7 @@ void ProfileInfo::splitEdge(const BasicBlock *FirstBB, // We know now how many edges there are from FirstBB to SecondBB, reroute a // proportional part of the edge weight over NewBB. - double neww = w / succ_count; + double neww = floor(w / succ_count); ECs[n1] += neww; ECs[n2] += neww; BlockInformation[F][NewBB] += neww; @@ -164,14 +405,672 @@ void ProfileInfo::splitEdge(const BasicBlock *FirstBB, } } -raw_ostream& llvm::operator<<(raw_ostream &O, ProfileInfo::Edge E) { +template<> +void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *Old, + const BasicBlock* New) { + const Function *F = Old->getParent(); + std::map<const Function*, EdgeWeights>::iterator J = + EdgeInformation.find(F); + if (J == EdgeInformation.end()) return; + + DEBUG(errs() << "Splitting " << Old->getName() << " to " << New->getName() << "\n"); + + std::set<Edge> Edges; + for (EdgeWeights::iterator ewi = J->second.begin(), ewe = J->second.end(); + ewi != ewe; ++ewi) { + Edge old = ewi->first; + if (old.first == Old) { + Edges.insert(old); + } + } + for (std::set<Edge>::iterator EI = Edges.begin(), EE = Edges.end(); + EI != EE; ++EI) { + Edge newedge = getEdge(New, EI->second); + replaceEdge(*EI, newedge); + } + + double w = getExecutionCount(Old); + setEdgeWeight(getEdge(Old, New), w); + setExecutionCount(New, w); +} + +template<> +void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *BB, + const BasicBlock* NewBB, + BasicBlock *const *Preds, + unsigned NumPreds) { + const Function *F = BB->getParent(); + std::map<const Function*, EdgeWeights>::iterator J = + EdgeInformation.find(F); + if (J == EdgeInformation.end()) return; + + DEBUG(errs() << "Splitting " << NumPreds << " Edges from " << BB->getName() + << " to " << NewBB->getName() << "\n"); + + // Collect weight that was redirected over NewBB. + double newweight = 0; + + std::set<const BasicBlock *> ProcessedPreds; + // For all requestes Predecessors. + for (unsigned pred = 0; pred < NumPreds; ++pred) { + const BasicBlock * Pred = Preds[pred]; + if (ProcessedPreds.insert(Pred).second) { + // Create edges and read old weight. + Edge oldedge = getEdge(Pred, BB); + Edge newedge = getEdge(Pred, NewBB); + + // Remember how much weight was redirected. + newweight += getEdgeWeight(oldedge); + + replaceEdge(oldedge,newedge); + } + } + + Edge newedge = getEdge(NewBB,BB); + setEdgeWeight(newedge, newweight); + setExecutionCount(NewBB, newweight); +} + +template<> +void ProfileInfoT<Function,BasicBlock>::transfer(const Function *Old, + const Function *New) { + DEBUG(errs() << "Replacing Function " << Old->getName() << " with " + << New->getName() << "\n"); + std::map<const Function*, EdgeWeights>::iterator J = + EdgeInformation.find(Old); + if(J != EdgeInformation.end()) { + EdgeInformation[New] = J->second; + } + EdgeInformation.erase(Old); + BlockInformation.erase(Old); + FunctionInformation.erase(Old); +} + +static double readEdgeOrRemember(ProfileInfo::Edge edge, double w, + ProfileInfo::Edge &tocalc, unsigned &uncalc) { + if (w == ProfileInfo::MissingValue) { + tocalc = edge; + uncalc++; + return 0; + } else { + return w; + } +} + +template<> +bool ProfileInfoT<Function,BasicBlock>:: + CalculateMissingEdge(const BasicBlock *BB, Edge &removed, + bool assumeEmptySelf) { + Edge edgetocalc; + unsigned uncalculated = 0; + + // collect weights of all incoming and outgoing edges, rememer edges that + // have no value + double incount = 0; + SmallSet<const BasicBlock*,8> pred_visited; + pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + if (bbi==bbe) { + Edge e = getEdge(0,BB); + incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated); + } + for (;bbi != bbe; ++bbi) { + if (pred_visited.insert(*bbi)) { + Edge e = getEdge(*bbi,BB); + incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated); + } + } + + double outcount = 0; + SmallSet<const BasicBlock*,8> succ_visited; + succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB); + if (sbbi==sbbe) { + Edge e = getEdge(BB,0); + if (getEdgeWeight(e) == MissingValue) { + double w = getExecutionCount(BB); + if (w != MissingValue) { + setEdgeWeight(e,w); + removed = e; + } + } + outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated); + } + for (;sbbi != sbbe; ++sbbi) { + if (succ_visited.insert(*sbbi)) { + Edge e = getEdge(BB,*sbbi); + outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated); + } + } + + // if exactly one edge weight was missing, calculate it and remove it from + // spanning tree + if (uncalculated == 0 ) { + return true; + } else + if (uncalculated == 1) { + if (incount < outcount) { + EdgeInformation[BB->getParent()][edgetocalc] = outcount-incount; + } else { + EdgeInformation[BB->getParent()][edgetocalc] = incount-outcount; + } + DEBUG(errs() << "--Calc Edge Counter for " << edgetocalc << ": " + << format("%.20g", getEdgeWeight(edgetocalc)) << "\n"); + removed = edgetocalc; + return true; + } else + if (uncalculated == 2 && assumeEmptySelf && edgetocalc.first == edgetocalc.second && incount == outcount) { + setEdgeWeight(edgetocalc, incount * 10); + removed = edgetocalc; + return true; + } else { + return false; + } +} + +static void readEdge(ProfileInfo *PI, ProfileInfo::Edge e, double &calcw, std::set<ProfileInfo::Edge> &misscount) { + double w = PI->getEdgeWeight(e); + if (w != ProfileInfo::MissingValue) { + calcw += w; + } else { + misscount.insert(e); + } +} + +template<> +bool ProfileInfoT<Function,BasicBlock>::EstimateMissingEdges(const BasicBlock *BB) { + bool hasNoSuccessors = false; + + double inWeight = 0; + std::set<Edge> inMissing; + std::set<const BasicBlock*> ProcessedPreds; + pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + if (bbi == bbe) { + readEdge(this,getEdge(0,BB),inWeight,inMissing); + } + for( ; bbi != bbe; ++bbi ) { + if (ProcessedPreds.insert(*bbi).second) { + readEdge(this,getEdge(*bbi,BB),inWeight,inMissing); + } + } + + double outWeight = 0; + std::set<Edge> outMissing; + std::set<const BasicBlock*> ProcessedSuccs; + succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB); + if (sbbi == sbbe) { + readEdge(this,getEdge(BB,0),outWeight,outMissing); + hasNoSuccessors = true; + } + for ( ; sbbi != sbbe; ++sbbi ) { + if (ProcessedSuccs.insert(*sbbi).second) { + readEdge(this,getEdge(BB,*sbbi),outWeight,outMissing); + } + } + + double share; + std::set<Edge>::iterator ei,ee; + if (inMissing.size() == 0 && outMissing.size() > 0) { + ei = outMissing.begin(); + ee = outMissing.end(); + share = inWeight/outMissing.size(); + setExecutionCount(BB,inWeight); + } else + if (inMissing.size() > 0 && outMissing.size() == 0 && outWeight == 0) { + ei = inMissing.begin(); + ee = inMissing.end(); + share = 0; + setExecutionCount(BB,0); + } else + if (inMissing.size() == 0 && outMissing.size() == 0) { + setExecutionCount(BB,outWeight); + return true; + } else { + return false; + } + for ( ; ei != ee; ++ei ) { + setEdgeWeight(*ei,share); + } + return true; +} + +template<> +void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) { +// if (getExecutionCount(&(F->getEntryBlock())) == 0) { +// for (Function::const_iterator FI = F->begin(), FE = F->end(); +// FI != FE; ++FI) { +// const BasicBlock* BB = &(*FI); +// { +// pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB); +// if (NBB == End) { +// setEdgeWeight(getEdge(0,BB),0); +// } +// for(;NBB != End; ++NBB) { +// setEdgeWeight(getEdge(*NBB,BB),0); +// } +// } +// { +// succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); +// if (NBB == End) { +// setEdgeWeight(getEdge(0,BB),0); +// } +// for(;NBB != End; ++NBB) { +// setEdgeWeight(getEdge(*NBB,BB),0); +// } +// } +// } +// return; +// } + // The set of BasicBlocks that are still unvisited. + std::set<const BasicBlock*> Unvisited; + + // The set of return edges (Edges with no successors). + std::set<Edge> ReturnEdges; + double ReturnWeight = 0; + + // First iterate over the whole function and collect: + // 1) The blocks in this function in the Unvisited set. + // 2) The return edges in the ReturnEdges set. + // 3) The flow that is leaving the function already via return edges. + + // Data structure for searching the function. + std::queue<const BasicBlock *> BFS; + const BasicBlock *BB = &(F->getEntryBlock()); + BFS.push(BB); + Unvisited.insert(BB); + + while (BFS.size()) { + BB = BFS.front(); BFS.pop(); + succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); + if (NBB == End) { + Edge e = getEdge(BB,0); + double w = getEdgeWeight(e); + if (w == MissingValue) { + // If the return edge has no value, try to read value from block. + double bw = getExecutionCount(BB); + if (bw != MissingValue) { + setEdgeWeight(e,bw); + ReturnWeight += bw; + } else { + // If both return edge and block provide no value, collect edge. + ReturnEdges.insert(e); + } + } else { + // If the return edge has a proper value, collect it. + ReturnWeight += w; + } + } + for (;NBB != End; ++NBB) { + if (Unvisited.insert(*NBB).second) { + BFS.push(*NBB); + } + } + } + + while (Unvisited.size() > 0) { + unsigned oldUnvisitedCount = Unvisited.size(); + bool FoundPath = false; + + // If there is only one edge left, calculate it. + if (ReturnEdges.size() == 1) { + ReturnWeight = getExecutionCount(&(F->getEntryBlock())) - ReturnWeight; + + Edge e = *ReturnEdges.begin(); + setEdgeWeight(e,ReturnWeight); + setExecutionCount(e.first,ReturnWeight); + + Unvisited.erase(e.first); + ReturnEdges.erase(e); + continue; + } + + // Calculate all blocks where only one edge is missing, this may also + // resolve furhter return edges. + std::set<const BasicBlock *>::iterator FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE) { + const BasicBlock *BB = *FI; ++FI; + Edge e; + if(CalculateMissingEdge(BB,e,true)) { + if (BlockInformation[F].find(BB) == BlockInformation[F].end()) { + setExecutionCount(BB,getExecutionCount(BB)); + } + Unvisited.erase(BB); + if (e.first != 0 && e.second == 0) { + ReturnEdges.erase(e); + ReturnWeight += getEdgeWeight(e); + } + } + } + if (oldUnvisitedCount > Unvisited.size()) continue; + + // Estimate edge weights by dividing the flow proportionally. + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE) { + const BasicBlock *BB = *FI; ++FI; + const BasicBlock *Dest = 0; + bool AllEdgesHaveSameReturn = true; + // Check each Successor, these must all end up in the same or an empty + // return block otherwise its dangerous to do an estimation on them. + for (succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB); + Succ != End; ++Succ) { + Path P; + GetPath(*Succ, 0, P, GetPathToExit); + if (Dest && Dest != P[0]) { + AllEdgesHaveSameReturn = false; + } + Dest = P[0]; + } + if (AllEdgesHaveSameReturn) { + if(EstimateMissingEdges(BB)) { + Unvisited.erase(BB); + break; + } + } + } + if (oldUnvisitedCount > Unvisited.size()) continue; + + // Check if there is a path to an block that has a known value and redirect + // flow accordingly. + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE && !FoundPath) { + // Fetch path. + const BasicBlock *BB = *FI; ++FI; + Path P; + const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToValue); + + // Calculate incoming flow. + double iw = 0; unsigned inmissing = 0; unsigned incount = 0; unsigned invalid = 0; + std::set<const BasicBlock *> Processed; + for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB); + NBB != End; ++NBB) { + if (Processed.insert(*NBB).second) { + Edge e = getEdge(*NBB, BB); + double ew = getEdgeWeight(e); + if (ew != MissingValue) { + iw += ew; + invalid++; + } else { + // If the path contains the successor, this means its a backedge, + // do not count as missing. + if (P.find(*NBB) == P.end()) + inmissing++; + } + incount++; + } + } + if (inmissing == incount) continue; + if (invalid == 0) continue; + + // Subtract (already) outgoing flow. + Processed.clear(); + for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); + NBB != End; ++NBB) { + if (Processed.insert(*NBB).second) { + Edge e = getEdge(BB, *NBB); + double ew = getEdgeWeight(e); + if (ew != MissingValue) { + iw -= ew; + } + } + } + if (iw < 0) continue; + + // Check the recieving end of the path if it can handle the flow. + double ow = getExecutionCount(Dest); + Processed.clear(); + for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); + NBB != End; ++NBB) { + if (Processed.insert(*NBB).second) { + Edge e = getEdge(BB, *NBB); + double ew = getEdgeWeight(e); + if (ew != MissingValue) { + ow -= ew; + } + } + } + if (ow < 0) continue; + + // Determine how much flow shall be used. + double ew = getEdgeWeight(getEdge(P[Dest],Dest)); + if (ew != MissingValue) { + ew = ew<ow?ew:ow; + ew = ew<iw?ew:iw; + } else { + if (inmissing == 0) + ew = iw; + } + + // Create flow. + if (ew != MissingValue) { + do { + Edge e = getEdge(P[Dest],Dest); + if (getEdgeWeight(e) == MissingValue) { + setEdgeWeight(e,ew); + FoundPath = true; + } + Dest = P[Dest]; + } while (Dest != BB); + } + } + if (FoundPath) continue; + + // Calculate a block with self loop. + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE && !FoundPath) { + const BasicBlock *BB = *FI; ++FI; + bool SelfEdgeFound = false; + for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); + NBB != End; ++NBB) { + if (*NBB == BB) { + SelfEdgeFound = true; + break; + } + } + if (SelfEdgeFound) { + Edge e = getEdge(BB,BB); + if (getEdgeWeight(e) == MissingValue) { + double iw = 0; + std::set<const BasicBlock *> Processed; + for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB); + NBB != End; ++NBB) { + if (Processed.insert(*NBB).second) { + Edge e = getEdge(*NBB, BB); + double ew = getEdgeWeight(e); + if (ew != MissingValue) { + iw += ew; + } + } + } + setEdgeWeight(e,iw * 10); + FoundPath = true; + } + } + } + if (FoundPath) continue; + + // Determine backedges, set them to zero. + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE && !FoundPath) { + const BasicBlock *BB = *FI; ++FI; + const BasicBlock *Dest; + Path P; + bool BackEdgeFound = false; + for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB); + NBB != End; ++NBB) { + Dest = GetPath(BB, *NBB, P, GetPathToDest | GetPathWithNewEdges); + if (Dest == *NBB) { + BackEdgeFound = true; + break; + } + } + if (BackEdgeFound) { + Edge e = getEdge(Dest,BB); + double w = getEdgeWeight(e); + if (w == MissingValue) { + setEdgeWeight(e,0); + FoundPath = true; + } + do { + Edge e = getEdge(P[Dest], Dest); + double w = getEdgeWeight(e); + if (w == MissingValue) { + setEdgeWeight(e,0); + FoundPath = true; + } + Dest = P[Dest]; + } while (Dest != BB); + } + } + if (FoundPath) continue; + + // Channel flow to return block. + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE && !FoundPath) { + const BasicBlock *BB = *FI; ++FI; + + Path P; + const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges); + Dest = P[0]; + if (!Dest) continue; + + if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) { + // Calculate incoming flow. + double iw = 0; + std::set<const BasicBlock *> Processed; + for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB); + NBB != End; ++NBB) { + if (Processed.insert(*NBB).second) { + Edge e = getEdge(*NBB, BB); + double ew = getEdgeWeight(e); + if (ew != MissingValue) { + iw += ew; + } + } + } + do { + Edge e = getEdge(P[Dest], Dest); + double w = getEdgeWeight(e); + if (w == MissingValue) { + setEdgeWeight(e,iw); + FoundPath = true; + } else { + assert(0 && "Edge should not have value already!"); + } + Dest = P[Dest]; + } while (Dest != BB); + } + } + if (FoundPath) continue; + + // Speculatively set edges to zero. + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE && !FoundPath) { + const BasicBlock *BB = *FI; ++FI; + + for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB); + NBB != End; ++NBB) { + Edge e = getEdge(*NBB,BB); + double w = getEdgeWeight(e); + if (w == MissingValue) { + setEdgeWeight(e,0); + FoundPath = true; + break; + } + } + } + if (FoundPath) continue; + + errs() << "{"; + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE) { + const BasicBlock *BB = *FI; ++FI; + errs() << BB->getName(); + if (FI != FE) + errs() << ","; + } + errs() << "}"; + + errs() << "ASSERT: could not repair function"; + assert(0 && "could not repair function"); + } + + EdgeWeights J = EdgeInformation[F]; + for (EdgeWeights::iterator EI = J.begin(), EE = J.end(); EI != EE; ++EI) { + Edge e = EI->first; + + bool SuccFound = false; + if (e.first != 0) { + succ_const_iterator NBB = succ_begin(e.first), End = succ_end(e.first); + if (NBB == End) { + if (0 == e.second) { + SuccFound = true; + } + } + for (;NBB != End; ++NBB) { + if (*NBB == e.second) { + SuccFound = true; + break; + } + } + if (!SuccFound) { + removeEdge(e); + } + } + } +} + +raw_ostream& operator<<(raw_ostream &O, const Function *F) { + return O << F->getName(); +} + +raw_ostream& operator<<(raw_ostream &O, const MachineFunction *MF) { + return O << MF->getFunction()->getName() << "(MF)"; +} + +raw_ostream& operator<<(raw_ostream &O, const BasicBlock *BB) { + return O << BB->getName(); +} + +raw_ostream& operator<<(raw_ostream &O, const MachineBasicBlock *MBB) { + return O << MBB->getBasicBlock()->getName() << "(MB)"; +} + +raw_ostream& operator<<(raw_ostream &O, std::pair<const BasicBlock *, const BasicBlock *> E) { O << "("; - O << (E.first ? E.first->getNameStr() : "0"); + + if (E.first) + O << E.first; + else + O << "0"; + + O << ","; + + if (E.second) + O << E.second; + else + O << "0"; + + return O << ")"; +} + +raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, const MachineBasicBlock *> E) { + O << "("; + + if (E.first) + O << E.first; + else + O << "0"; + O << ","; - O << (E.second ? E.second->getNameStr() : "0"); + + if (E.second) + O << E.second; + else + O << "0"; + return O << ")"; } +} // namespace llvm + //===----------------------------------------------------------------------===// // NoProfile ProfileInfo implementation // diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp index 9e1dfb6..cbd0430 100644 --- a/lib/Analysis/ProfileInfoLoaderPass.cpp +++ b/lib/Analysis/ProfileInfoLoaderPass.cpp @@ -74,6 +74,8 @@ X("profile-loader", "Load profile information from llvmprof.out", false, true); static RegisterAnalysisGroup<ProfileInfo> Y(X); +const PassInfo *llvm::ProfileLoaderPassID = &X; + ModulePass *llvm::createProfileLoaderPass() { return new LoaderPass(); } /// createProfileLoaderPass - This function returns a Pass that loads the @@ -112,46 +114,9 @@ void LoaderPass::recurseBasicBlock(const BasicBlock *BB) { recurseBasicBlock(*bbi); } - Edge edgetocalc; - unsigned uncalculated = 0; - - // collect weights of all incoming and outgoing edges, rememer edges that - // have no value - double incount = 0; - SmallSet<const BasicBlock*,8> pred_visited; - pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB); - if (bbi==bbe) { - readEdgeOrRemember(getEdge(0, BB),edgetocalc,uncalculated,incount); - } - for (;bbi != bbe; ++bbi) { - if (pred_visited.insert(*bbi)) { - readEdgeOrRemember(getEdge(*bbi, BB),edgetocalc,uncalculated,incount); - } - } - - double outcount = 0; - SmallSet<const BasicBlock*,8> succ_visited; - succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB); - if (sbbi==sbbe) { - readEdgeOrRemember(getEdge(BB, 0),edgetocalc,uncalculated,outcount); - } - for (;sbbi != sbbe; ++sbbi) { - if (succ_visited.insert(*sbbi)) { - readEdgeOrRemember(getEdge(BB, *sbbi),edgetocalc,uncalculated,outcount); - } - } - - // if exactly one edge weight was missing, calculate it and remove it from - // spanning tree - if (uncalculated == 1) { - if (incount < outcount) { - EdgeInformation[BB->getParent()][edgetocalc] = outcount-incount; - } else { - EdgeInformation[BB->getParent()][edgetocalc] = incount-outcount; - } - DEBUG(errs() << "--Calc Edge Counter for " << edgetocalc << ": " - << format("%g", getEdgeWeight(edgetocalc)) << "\n"); - SpanningTree.erase(edgetocalc); + Edge tocalc; + if (CalculateMissingEdge(BB, tocalc)) { + SpanningTree.erase(tocalc); } } @@ -219,9 +184,9 @@ bool LoaderPass::runOnModule(Module &M) { } } while (SpanningTree.size() > 0) { -#if 0 + unsigned size = SpanningTree.size(); -#endif + BBisUnvisited.clear(); for (std::set<Edge>::iterator ei = SpanningTree.begin(), ee = SpanningTree.end(); ei != ee; ++ei) { @@ -231,17 +196,16 @@ bool LoaderPass::runOnModule(Module &M) { while (BBisUnvisited.size() > 0) { recurseBasicBlock(*BBisUnvisited.begin()); } -#if 0 + if (SpanningTree.size() == size) { DEBUG(errs()<<"{"); for (std::set<Edge>::iterator ei = SpanningTree.begin(), ee = SpanningTree.end(); ei != ee; ++ei) { - DEBUG(errs()<<"("<<(ei->first?ei->first->getName():"0")<<"," - <<(ei->second?ei->second->getName():"0")<<"),"); + DEBUG(errs()<< *ei <<","); } assert(0 && "No edge calculated!"); } -#endif + } } if (ReadCount != Counters.size()) { diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp index 5f36294..36a80ba 100644 --- a/lib/Analysis/ProfileVerifierPass.cpp +++ b/lib/Analysis/ProfileVerifierPass.cpp @@ -21,6 +21,7 @@ #include "llvm/Support/CFG.h" #include "llvm/Support/InstIterator.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" #include "llvm/Support/Debug.h" #include <set> using namespace llvm; @@ -29,44 +30,45 @@ static cl::opt<bool,false> ProfileVerifierDisableAssertions("profile-verifier-noassert", cl::desc("Disable assertions")); -namespace { - class ProfileVerifierPass : public FunctionPass { +namespace llvm { + template<class FType, class BType> + class ProfileVerifierPassT : public FunctionPass { struct DetailedBlockInfo { - const BasicBlock *BB; - double BBWeight; - double inWeight; - int inCount; - double outWeight; - int outCount; + const BType *BB; + double BBWeight; + double inWeight; + int inCount; + double outWeight; + int outCount; }; - ProfileInfo *PI; - std::set<const BasicBlock*> BBisVisited; - std::set<const Function*> FisVisited; + ProfileInfoT<FType, BType> *PI; + std::set<const BType*> BBisVisited; + std::set<const FType*> FisVisited; bool DisableAssertions; // When debugging is enabled, the verifier prints a whole slew of debug // information, otherwise its just the assert. These are all the helper // functions. bool PrintedDebugTree; - std::set<const BasicBlock*> BBisPrinted; + std::set<const BType*> BBisPrinted; void debugEntry(DetailedBlockInfo*); - void printDebugInfo(const BasicBlock *BB); + void printDebugInfo(const BType *BB); public: static char ID; // Class identification, replacement for typeinfo - explicit ProfileVerifierPass () : FunctionPass(&ID) { + explicit ProfileVerifierPassT () : FunctionPass(&ID) { DisableAssertions = ProfileVerifierDisableAssertions; } - explicit ProfileVerifierPass (bool da) : FunctionPass(&ID), - DisableAssertions(da) { + explicit ProfileVerifierPassT (bool da) : FunctionPass(&ID), + DisableAssertions(da) { } void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired<ProfileInfo>(); + AU.addRequired<ProfileInfoT<FType, BType> >(); } const char *getPassName() const { @@ -74,271 +76,302 @@ namespace { } /// run - Verify the profile information. - bool runOnFunction(Function &F); - void recurseBasicBlock(const BasicBlock*); + bool runOnFunction(FType &F); + void recurseBasicBlock(const BType*); - bool exitReachable(const Function*); - double ReadOrAssert(ProfileInfo::Edge); + bool exitReachable(const FType*); + double ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge); void CheckValue(bool, const char*, DetailedBlockInfo*); }; -} // End of anonymous namespace - -char ProfileVerifierPass::ID = 0; -static RegisterPass<ProfileVerifierPass> -X("profile-verifier", "Verify profiling information", false, true); -namespace llvm { - FunctionPass *createProfileVerifierPass() { - return new ProfileVerifierPass(ProfileVerifierDisableAssertions); - } -} - -void ProfileVerifierPass::printDebugInfo(const BasicBlock *BB) { - - if (BBisPrinted.find(BB) != BBisPrinted.end()) return; - - double BBWeight = PI->getExecutionCount(BB); - if (BBWeight == ProfileInfo::MissingValue) { BBWeight = 0; } - double inWeight = 0; - int inCount = 0; - std::set<const BasicBlock*> ProcessedPreds; - for ( pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB); - bbi != bbe; ++bbi ) { - if (ProcessedPreds.insert(*bbi).second) { - ProfileInfo::Edge E = PI->getEdge(*bbi,BB); - double EdgeWeight = PI->getEdgeWeight(E); - if (EdgeWeight == ProfileInfo::MissingValue) { EdgeWeight = 0; } - errs() << "calculated in-edge " << E << ": " << EdgeWeight << "\n"; - inWeight += EdgeWeight; - inCount++; + typedef ProfileVerifierPassT<Function, BasicBlock> ProfileVerifierPass; + + template<class FType, class BType> + void ProfileVerifierPassT<FType, BType>::printDebugInfo(const BType *BB) { + + if (BBisPrinted.find(BB) != BBisPrinted.end()) return; + + double BBWeight = PI->getExecutionCount(BB); + if (BBWeight == ProfileInfoT<FType, BType>::MissingValue) { BBWeight = 0; } + double inWeight = 0; + int inCount = 0; + std::set<const BType*> ProcessedPreds; + for ( pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + bbi != bbe; ++bbi ) { + if (ProcessedPreds.insert(*bbi).second) { + typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(*bbi,BB); + double EdgeWeight = PI->getEdgeWeight(E); + if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; } + errs() << "calculated in-edge " << E << ": " + << format("%20.20g",EdgeWeight) << "\n"; + inWeight += EdgeWeight; + inCount++; + } } - } - double outWeight = 0; - int outCount = 0; - std::set<const BasicBlock*> ProcessedSuccs; - for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); - bbi != bbe; ++bbi ) { - if (ProcessedSuccs.insert(*bbi).second) { - ProfileInfo::Edge E = PI->getEdge(BB,*bbi); - double EdgeWeight = PI->getEdgeWeight(E); - if (EdgeWeight == ProfileInfo::MissingValue) { EdgeWeight = 0; } - errs() << "calculated out-edge " << E << ": " << EdgeWeight << "\n"; - outWeight += EdgeWeight; - outCount++; + double outWeight = 0; + int outCount = 0; + std::set<const BType*> ProcessedSuccs; + for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + bbi != bbe; ++bbi ) { + if (ProcessedSuccs.insert(*bbi).second) { + typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(BB,*bbi); + double EdgeWeight = PI->getEdgeWeight(E); + if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; } + errs() << "calculated out-edge " << E << ": " + << format("%20.20g",EdgeWeight) << "\n"; + outWeight += EdgeWeight; + outCount++; + } + } + errs() << "Block " << BB->getNameStr() << " in " + << BB->getParent()->getNameStr() << ":" + << "BBWeight=" << format("%20.20g",BBWeight) << "," + << "inWeight=" << format("%20.20g",inWeight) << "," + << "inCount=" << inCount << "," + << "outWeight=" << format("%20.20g",outWeight) << "," + << "outCount" << outCount << "\n"; + + // mark as visited and recurse into subnodes + BBisPrinted.insert(BB); + for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + bbi != bbe; ++bbi ) { + printDebugInfo(*bbi); } } - errs()<<"Block "<<BB->getNameStr()<<" in "<<BB->getParent()->getNameStr() - <<",BBWeight="<<BBWeight<<",inWeight="<<inWeight<<",inCount="<<inCount - <<",outWeight="<<outWeight<<",outCount"<<outCount<<"\n"; - - // mark as visited and recurse into subnodes - BBisPrinted.insert(BB); - for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); - bbi != bbe; ++bbi ) { - printDebugInfo(*bbi); - } -} -void ProfileVerifierPass::debugEntry (DetailedBlockInfo *DI) { - errs() << "TROUBLE: Block " << DI->BB->getNameStr() << " in " - << DI->BB->getParent()->getNameStr() << ":"; - errs() << "BBWeight=" << DI->BBWeight << ","; - errs() << "inWeight=" << DI->inWeight << ","; - errs() << "inCount=" << DI->inCount << ","; - errs() << "outWeight=" << DI->outWeight << ","; - errs() << "outCount=" << DI->outCount << "\n"; - if (!PrintedDebugTree) { - PrintedDebugTree = true; - printDebugInfo(&(DI->BB->getParent()->getEntryBlock())); + template<class FType, class BType> + void ProfileVerifierPassT<FType, BType>::debugEntry (DetailedBlockInfo *DI) { + errs() << "TROUBLE: Block " << DI->BB->getNameStr() << " in " + << DI->BB->getParent()->getNameStr() << ":" + << "BBWeight=" << format("%20.20g",DI->BBWeight) << "," + << "inWeight=" << format("%20.20g",DI->inWeight) << "," + << "inCount=" << DI->inCount << "," + << "outWeight=" << format("%20.20g",DI->outWeight) << "," + << "outCount=" << DI->outCount << "\n"; + if (!PrintedDebugTree) { + PrintedDebugTree = true; + printDebugInfo(&(DI->BB->getParent()->getEntryBlock())); + } } -} -// This compares A and B but considering maybe small differences. -static bool Equals(double A, double B) { - double maxRelativeError = 0.0000001; - if (A == B) - return true; - double relativeError; - if (fabs(B) > fabs(A)) - relativeError = fabs((A - B) / B); - else - relativeError = fabs((A - B) / A); - if (relativeError <= maxRelativeError) return true; - return false; -} + // This compares A and B for equality. + static bool Equals(double A, double B) { + return A == B; + } -// This checks if the function "exit" is reachable from an given function -// via calls, this is necessary to check if a profile is valid despite the -// counts not fitting exactly. -bool ProfileVerifierPass::exitReachable(const Function *F) { - if (!F) return false; + // This checks if the function "exit" is reachable from an given function + // via calls, this is necessary to check if a profile is valid despite the + // counts not fitting exactly. + template<class FType, class BType> + bool ProfileVerifierPassT<FType, BType>::exitReachable(const FType *F) { + if (!F) return false; - if (FisVisited.count(F)) return false; + if (FisVisited.count(F)) return false; - Function *Exit = F->getParent()->getFunction("exit"); - if (Exit == F) { - return true; - } + FType *Exit = F->getParent()->getFunction("exit"); + if (Exit == F) { + return true; + } - FisVisited.insert(F); - bool exits = false; - for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { - if (const CallInst *CI = dyn_cast<CallInst>(&*I)) { - exits |= exitReachable(CI->getCalledFunction()); - if (exits) break; + FisVisited.insert(F); + bool exits = false; + for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + if (const CallInst *CI = dyn_cast<CallInst>(&*I)) { + FType *F = CI->getCalledFunction(); + if (F) { + exits |= exitReachable(F); + } else { + // This is a call to a pointer, all bets are off... + exits = true; + } + if (exits) break; + } } + return exits; } - return exits; -} -#define ASSERTMESSAGE(M) \ - errs() << (M) << "\n"; \ - if (!DisableAssertions) assert(0 && (M)); - -double ProfileVerifierPass::ReadOrAssert(ProfileInfo::Edge E) { - double EdgeWeight = PI->getEdgeWeight(E); - if (EdgeWeight == ProfileInfo::MissingValue) { - errs() << "Edge " << E << " in Function " - << ProfileInfo::getFunction(E)->getNameStr() << ": "; - ASSERTMESSAGE("ASSERT:Edge has missing value"); - return 0; - } else { - return EdgeWeight; + #define ASSERTMESSAGE(M) \ + { errs() << "ASSERT:" << (M) << "\n"; \ + if (!DisableAssertions) assert(0 && (M)); } + + template<class FType, class BType> + double ProfileVerifierPassT<FType, BType>::ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge E) { + double EdgeWeight = PI->getEdgeWeight(E); + if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { + errs() << "Edge " << E << " in Function " + << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": "; + ASSERTMESSAGE("Edge has missing value"); + return 0; + } else { + if (EdgeWeight < 0) { + errs() << "Edge " << E << " in Function " + << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": "; + ASSERTMESSAGE("Edge has negative value"); + } + return EdgeWeight; + } } -} -void ProfileVerifierPass::CheckValue(bool Error, const char *Message, - DetailedBlockInfo *DI) { - if (Error) { - DEBUG(debugEntry(DI)); - errs() << "Block " << DI->BB->getNameStr() << " in Function " - << DI->BB->getParent()->getNameStr() << ": "; - ASSERTMESSAGE(Message); + template<class FType, class BType> + void ProfileVerifierPassT<FType, BType>::CheckValue(bool Error, + const char *Message, + DetailedBlockInfo *DI) { + if (Error) { + DEBUG(debugEntry(DI)); + errs() << "Block " << DI->BB->getNameStr() << " in Function " + << DI->BB->getParent()->getNameStr() << ": "; + ASSERTMESSAGE(Message); + } + return; } - return; -} -// This calculates the Information for a block and then recurses into the -// successors. -void ProfileVerifierPass::recurseBasicBlock(const BasicBlock *BB) { - - // Break the recursion by remembering all visited blocks. - if (BBisVisited.find(BB) != BBisVisited.end()) return; - - // Use a data structure to store all the information, this can then be handed - // to debug printers. - DetailedBlockInfo DI; - DI.BB = BB; - DI.outCount = DI.inCount = 0; - DI.inWeight = DI.outWeight = 0.0; - - // Read predecessors. - std::set<const BasicBlock*> ProcessedPreds; - pred_const_iterator bpi = pred_begin(BB), bpe = pred_end(BB); - // If there are none, check for (0,BB) edge. - if (bpi == bpe) { - DI.inWeight += ReadOrAssert(PI->getEdge(0,BB)); - DI.inCount++; - } - for (;bpi != bpe; ++bpi) { - if (ProcessedPreds.insert(*bpi).second) { - DI.inWeight += ReadOrAssert(PI->getEdge(*bpi,BB)); + // This calculates the Information for a block and then recurses into the + // successors. + template<class FType, class BType> + void ProfileVerifierPassT<FType, BType>::recurseBasicBlock(const BType *BB) { + + // Break the recursion by remembering all visited blocks. + if (BBisVisited.find(BB) != BBisVisited.end()) return; + + // Use a data structure to store all the information, this can then be handed + // to debug printers. + DetailedBlockInfo DI; + DI.BB = BB; + DI.outCount = DI.inCount = 0; + DI.inWeight = DI.outWeight = 0; + + // Read predecessors. + std::set<const BType*> ProcessedPreds; + pred_const_iterator bpi = pred_begin(BB), bpe = pred_end(BB); + // If there are none, check for (0,BB) edge. + if (bpi == bpe) { + DI.inWeight += ReadOrAssert(PI->getEdge(0,BB)); DI.inCount++; } - } + for (;bpi != bpe; ++bpi) { + if (ProcessedPreds.insert(*bpi).second) { + DI.inWeight += ReadOrAssert(PI->getEdge(*bpi,BB)); + DI.inCount++; + } + } - // Read successors. - std::set<const BasicBlock*> ProcessedSuccs; - succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); - // If there is an (0,BB) edge, consider it too. (This is done not only when - // there are no successors, but every time; not every function contains - // return blocks with no successors (think loop latch as return block)). - double w = PI->getEdgeWeight(PI->getEdge(BB,0)); - if (w != ProfileInfo::MissingValue) { - DI.outWeight += w; - DI.outCount++; - } - for (;bbi != bbe; ++bbi) { - if (ProcessedSuccs.insert(*bbi).second) { - DI.outWeight += ReadOrAssert(PI->getEdge(BB,*bbi)); + // Read successors. + std::set<const BType*> ProcessedSuccs; + succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + // If there is an (0,BB) edge, consider it too. (This is done not only when + // there are no successors, but every time; not every function contains + // return blocks with no successors (think loop latch as return block)). + double w = PI->getEdgeWeight(PI->getEdge(BB,0)); + if (w != ProfileInfoT<FType, BType>::MissingValue) { + DI.outWeight += w; DI.outCount++; } - } + for (;bbi != bbe; ++bbi) { + if (ProcessedSuccs.insert(*bbi).second) { + DI.outWeight += ReadOrAssert(PI->getEdge(BB,*bbi)); + DI.outCount++; + } + } - // Read block weight. - DI.BBWeight = PI->getExecutionCount(BB); - CheckValue(DI.BBWeight == ProfileInfo::MissingValue, - "ASSERT:BasicBlock has missing value", &DI); - - // Check if this block is a setjmp target. - bool isSetJmpTarget = false; - if (DI.outWeight > DI.inWeight) { - for (BasicBlock::const_iterator i = BB->begin(), ie = BB->end(); - i != ie; ++i) { - if (const CallInst *CI = dyn_cast<CallInst>(&*i)) { - Function *F = CI->getCalledFunction(); - if (F && (F->getNameStr() == "_setjmp")) { - isSetJmpTarget = true; break; + // Read block weight. + DI.BBWeight = PI->getExecutionCount(BB); + CheckValue(DI.BBWeight == ProfileInfoT<FType, BType>::MissingValue, + "BasicBlock has missing value", &DI); + CheckValue(DI.BBWeight < 0, + "BasicBlock has negative value", &DI); + + // Check if this block is a setjmp target. + bool isSetJmpTarget = false; + if (DI.outWeight > DI.inWeight) { + for (typename BType::const_iterator i = BB->begin(), ie = BB->end(); + i != ie; ++i) { + if (const CallInst *CI = dyn_cast<CallInst>(&*i)) { + FType *F = CI->getCalledFunction(); + if (F && (F->getNameStr() == "_setjmp")) { + isSetJmpTarget = true; break; + } } } } - } - // Check if this block is eventually reaching exit. - bool isExitReachable = false; - if (DI.inWeight > DI.outWeight) { - for (BasicBlock::const_iterator i = BB->begin(), ie = BB->end(); - i != ie; ++i) { - if (const CallInst *CI = dyn_cast<CallInst>(&*i)) { - FisVisited.clear(); - isExitReachable |= exitReachable(CI->getCalledFunction()); - if (isExitReachable) break; + // Check if this block is eventually reaching exit. + bool isExitReachable = false; + if (DI.inWeight > DI.outWeight) { + for (typename BType::const_iterator i = BB->begin(), ie = BB->end(); + i != ie; ++i) { + if (const CallInst *CI = dyn_cast<CallInst>(&*i)) { + FType *F = CI->getCalledFunction(); + if (F) { + FisVisited.clear(); + isExitReachable |= exitReachable(F); + } else { + // This is a call to a pointer, all bets are off... + isExitReachable = true; + } + if (isExitReachable) break; + } } } - } - if (DI.inCount > 0 && DI.outCount == 0) { - // If this is a block with no successors. - if (!isSetJmpTarget) { - CheckValue(!Equals(DI.inWeight,DI.BBWeight), - "ASSERT:inWeight and BBWeight do not match", &DI); + if (DI.inCount > 0 && DI.outCount == 0) { + // If this is a block with no successors. + if (!isSetJmpTarget) { + CheckValue(!Equals(DI.inWeight,DI.BBWeight), + "inWeight and BBWeight do not match", &DI); + } + } else if (DI.inCount == 0 && DI.outCount > 0) { + // If this is a block with no predecessors. + if (!isExitReachable) + CheckValue(!Equals(DI.BBWeight,DI.outWeight), + "BBWeight and outWeight do not match", &DI); + } else { + // If this block has successors and predecessors. + if (DI.inWeight > DI.outWeight && !isExitReachable) + CheckValue(!Equals(DI.inWeight,DI.outWeight), + "inWeight and outWeight do not match", &DI); + if (DI.inWeight < DI.outWeight && !isSetJmpTarget) + CheckValue(!Equals(DI.inWeight,DI.outWeight), + "inWeight and outWeight do not match", &DI); } - } else if (DI.inCount == 0 && DI.outCount > 0) { - // If this is a block with no predecessors. - if (!isExitReachable) - CheckValue(!Equals(DI.BBWeight,DI.outWeight), - "ASSERT:BBWeight and outWeight do not match", &DI); - } else { - // If this block has successors and predecessors. - if (DI.inWeight > DI.outWeight && !isExitReachable) - CheckValue(!Equals(DI.inWeight,DI.outWeight), - "ASSERT:inWeight and outWeight do not match", &DI); - if (DI.inWeight < DI.outWeight && !isSetJmpTarget) - CheckValue(!Equals(DI.inWeight,DI.outWeight), - "ASSERT:inWeight and outWeight do not match", &DI); - } - // Mark this block as visited, rescurse into successors. - BBisVisited.insert(BB); - for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); - bbi != bbe; ++bbi ) { - recurseBasicBlock(*bbi); + // Mark this block as visited, rescurse into successors. + BBisVisited.insert(BB); + for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + bbi != bbe; ++bbi ) { + recurseBasicBlock(*bbi); + } } -} -bool ProfileVerifierPass::runOnFunction(Function &F) { - PI = &getAnalysis<ProfileInfo>(); + template<class FType, class BType> + bool ProfileVerifierPassT<FType, BType>::runOnFunction(FType &F) { + PI = getAnalysisIfAvailable<ProfileInfoT<FType, BType> >(); + if (!PI) + ASSERTMESSAGE("No ProfileInfo available"); + + // Prepare global variables. + PrintedDebugTree = false; + BBisVisited.clear(); + + // Fetch entry block and recurse into it. + const BType *entry = &F.getEntryBlock(); + recurseBasicBlock(entry); + + if (PI->getExecutionCount(&F) != PI->getExecutionCount(entry)) + ASSERTMESSAGE("Function count and entry block count do not match"); - // Prepare global variables. - PrintedDebugTree = false; - BBisVisited.clear(); + return false; + } + + template<class FType, class BType> + char ProfileVerifierPassT<FType, BType>::ID = 0; +} - // Fetch entry block and recurse into it. - const BasicBlock *entry = &F.getEntryBlock(); - recurseBasicBlock(entry); +static RegisterPass<ProfileVerifierPass> +X("profile-verifier", "Verify profiling information", false, true); - if (!DisableAssertions) - assert((PI->getExecutionCount(&F)==PI->getExecutionCount(entry)) && - "Function count and entry block count do not match"); - return false; +namespace llvm { + FunctionPass *createProfileVerifierPass() { + return new ProfileVerifierPass(ProfileVerifierDisableAssertions); + } } + diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index d674ee8..7157d47 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -357,7 +357,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // without the other. SplitAddRecs(Ops, Ty, SE); - // Decend down the pointer's type and attempt to convert the other + // Descend down the pointer's type and attempt to convert the other // operands into GEP indices, at each level. The first index in a GEP // indexes into the array implied by the pointer operand; the rest of // the indices index into the element or field type selected by the @@ -628,7 +628,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); BasicBlock::iterator NewInsertPt = - next(BasicBlock::iterator(cast<Instruction>(V))); + llvm::next(BasicBlock::iterator(cast<Instruction>(V))); while (isa<PHINode>(NewInsertPt)) ++NewInsertPt; V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0, NewInsertPt); @@ -844,7 +844,7 @@ Value *SCEVExpander::expand(const SCEV *S) { if (L && S->hasComputableLoopEvolution(L)) InsertPt = L->getHeader()->getFirstNonPHI(); while (isInsertedInstruction(InsertPt)) - InsertPt = next(BasicBlock::iterator(InsertPt)); + InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); break; } diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 31d3ccc..22c6e3b 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -659,7 +659,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, switch (Operator::getOpcode(V)) { default: break; case Instruction::SExt: - Tmp = TyBits-cast<IntegerType>(U->getOperand(0)->getType())->getBitWidth(); + Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp; case Instruction::AShr: |