diff options
Diffstat (limited to 'contrib/llvm/lib/Analysis')
41 files changed, 2793 insertions, 4706 deletions
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp index 210b80a..b8b6d37 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" @@ -361,24 +362,6 @@ AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) { } namespace { - // Conservatively return true. Return false, if there is a single path - // starting from "From" and the path does not reach "To". - static bool hasPath(const BasicBlock *From, const BasicBlock *To) { - const unsigned MaxCheck = 5; - const BasicBlock *Current = From; - for (unsigned I = 0; I < MaxCheck; I++) { - unsigned NumSuccs = Current->getTerminator()->getNumSuccessors(); - if (NumSuccs > 1) - return true; - if (NumSuccs == 0) - return false; - Current = Current->getTerminator()->getSuccessor(0); - if (Current == To) - return true; - } - return true; - } - /// Only find pointer captures which happen before the given instruction. Uses /// the dominator tree to determine whether one instruction is before another. /// Only support the case where the Value is defined in the same basic block @@ -400,7 +383,7 @@ namespace { // there is no need to explore the use if BeforeHere dominates use. // Check whether there is a path from I to BeforeHere. if (BeforeHere != I && DT->dominates(BeforeHere, I) && - !hasPath(BB, BeforeHere->getParent())) + !isPotentiallyReachable(I, BeforeHere, DT)) return false; return true; } @@ -412,7 +395,7 @@ namespace { if (BeforeHere != I && !DT->isReachableFromEntry(BB)) return false; if (BeforeHere != I && DT->dominates(BeforeHere, I) && - !hasPath(BB, BeforeHere->getParent())) + !isPotentiallyReachable(I, BeforeHere, DT)) return false; Captured = true; return true; @@ -450,6 +433,7 @@ AliasAnalysis::callCapturesBefore(const Instruction *I, return AliasAnalysis::ModRef; unsigned ArgNo = 0; + AliasAnalysis::ModRefResult R = AliasAnalysis::NoModRef; for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); CI != CE; ++CI, ++ArgNo) { // Only look at the no-capture or byval pointer arguments. If this @@ -463,12 +447,18 @@ AliasAnalysis::callCapturesBefore(const Instruction *I, // is impossible to alias the pointer we're checking. If not, we have to // assume that the call could touch the pointer, even though it doesn't // escape. - if (!isNoAlias(AliasAnalysis::Location(*CI), - AliasAnalysis::Location(Object))) { - return AliasAnalysis::ModRef; + if (isNoAlias(AliasAnalysis::Location(*CI), + AliasAnalysis::Location(Object))) + continue; + if (CS.doesNotAccessMemory(ArgNo)) + continue; + if (CS.onlyReadsMemory(ArgNo)) { + R = AliasAnalysis::Ref; + continue; } + return AliasAnalysis::ModRef; } - return AliasAnalysis::NoModRef; + return R; } // AliasAnalysis destructor: DO NOT move this to the header file for @@ -537,6 +527,15 @@ bool llvm::isNoAliasCall(const Value *V) { return false; } +/// isNoAliasArgument - Return true if this is an argument with the noalias +/// attribute. +bool llvm::isNoAliasArgument(const Value *V) +{ + if (const Argument *A = dyn_cast<Argument>(V)) + return A->hasNoAliasAttr(); + return false; +} + /// isIdentifiedObject - Return true if this pointer refers to a distinct and /// identifiable object. This returns true for: /// Global Variables and Functions (but not Global Aliases) diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp index 5910526..2289c12 100644 --- a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp @@ -299,7 +299,6 @@ bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) { bool AliasSetTracker::add(LoadInst *LI) { if (LI->getOrdering() > Monotonic) return addUnknown(LI); AliasSet::AccessType ATy = AliasSet::Refs; - if (!LI->isUnordered()) ATy = AliasSet::ModRef; bool NewPtr; AliasSet &AS = addPointer(LI->getOperand(0), AA.getTypeStoreSize(LI->getType()), @@ -312,7 +311,6 @@ bool AliasSetTracker::add(LoadInst *LI) { bool AliasSetTracker::add(StoreInst *SI) { if (SI->getOrdering() > Monotonic) return addUnknown(SI); AliasSet::AccessType ATy = AliasSet::Mods; - if (!SI->isUnordered()) ATy = AliasSet::ModRef; bool NewPtr; Value *Val = SI->getOperand(0); AliasSet &AS = addPointer(SI->getOperand(1), diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp index 349c417..98f2a55 100644 --- a/contrib/llvm/lib/Analysis/Analysis.cpp +++ b/contrib/llvm/lib/Analysis/Analysis.cpp @@ -34,6 +34,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeCFGOnlyViewerPass(Registry); initializeCFGOnlyPrinterPass(Registry); initializeDependenceAnalysisPass(Registry); + initializeDelinearizationPass(Registry); initializeDominanceFrontierPass(Registry); initializeDomViewerPass(Registry); initializeDomPrinterPass(Registry); @@ -54,16 +55,6 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeMemoryDependenceAnalysisPass(Registry); initializeModuleDebugInfoPrinterPass(Registry); initializePostDominatorTreePass(Registry); - initializeProfileEstimatorPassPass(Registry); - initializeNoProfileInfoPass(Registry); - initializeNoPathProfileInfoPass(Registry); - initializeProfileInfoAnalysisGroup(Registry); - initializePathProfileInfoAnalysisGroup(Registry); - initializeLoaderPassPass(Registry); - initializePathProfileLoaderPassPass(Registry); - initializeProfileVerifierPassPass(Registry); - initializePathProfileVerifierPass(Registry); - initializeProfileMetadataLoaderPassPass(Registry); initializeRegionInfoPass(Registry); initializeRegionViewerPass(Registry); initializeRegionPrinterPass(Registry); diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp index f8509dd..b2c2011 100644 --- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -122,7 +122,7 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size, // question (in this case rewind to p), or // - just give up. It is up to caller to make sure the pointer is pointing // to the base address the object. - // + // // We go for 2nd option for simplicity. if (!isIdentifiedObject(V)) return false; @@ -130,7 +130,7 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size, // This function needs to use the aligned object size because we allow // reads a bit past the end given sufficient alignment. uint64_t ObjectSize = getObjectSize(V, TD, TLI, /*RoundToAlign*/true); - + return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size; } @@ -142,6 +142,17 @@ static bool isObjectSize(const Value *V, uint64_t Size, return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize == Size; } +/// isIdentifiedFunctionLocal - Return true if V is umabigously identified +/// at the function-level. Different IdentifiedFunctionLocals can't alias. +/// Further, an IdentifiedFunctionLocal can not alias with any function +/// arguments other than itself, which is not neccessarily true for +/// IdentifiedObjects. +static bool isIdentifiedFunctionLocal(const Value *V) +{ + return isa<AllocaInst>(V) || isNoAliasCall(V) || isNoAliasArgument(V); +} + + //===----------------------------------------------------------------------===// // GetElementPtr Instruction Decomposition and Analysis //===----------------------------------------------------------------------===// @@ -152,7 +163,7 @@ namespace { EK_SignExt, EK_ZeroExt }; - + struct VariableGEPIndex { const Value *V; ExtensionKind Extension; @@ -189,7 +200,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, Offset = 0; return V; } - + if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) { if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) { switch (BOp->getOpcode()) { @@ -220,7 +231,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, } } } - + // Since GEP indices are sign extended anyway, we don't care about the high // bits of a sign or zero extended value - just scales and offsets. The // extensions have to be consistent though. @@ -237,10 +248,10 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, TD, Depth+1); Scale = Scale.zext(OldWidth); Offset = Offset.zext(OldWidth); - + return Result; } - + Scale = 1; Offset = 0; return V; @@ -265,7 +276,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, const DataLayout *TD) { // Limit recursion depth to limit compile time in crazy cases. unsigned MaxLookup = 6; - + BaseOffs = 0; do { // See if this is a bitcast or GEP. @@ -280,7 +291,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, } return V; } - + if (Op->getOpcode() == Instruction::BitCast) { V = Op->getOperand(0); continue; @@ -297,15 +308,14 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, V = Simplified; continue; } - + return V; } - + // Don't attempt to analyze GEPs over unsized objects. - if (!cast<PointerType>(GEPOp->getOperand(0)->getType()) - ->getElementType()->isSized()) + if (!GEPOp->getOperand(0)->getType()->getPointerElementType()->isSized()) return V; - + // If we are lacking DataLayout information, we can't compute the offets of // elements computed by GEPs. However, we can handle bitcast equivalent // GEPs. @@ -315,7 +325,8 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, V = GEPOp->getOperand(0); continue; } - + + unsigned AS = GEPOp->getPointerAddressSpace(); // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices. gep_type_iterator GTI = gep_type_begin(GEPOp); for (User::const_op_iterator I = GEPOp->op_begin()+1, @@ -326,38 +337,37 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, // For a struct, add the member offset. unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); if (FieldNo == 0) continue; - + BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo); continue; } - + // For an array/pointer, add the element offset, explicitly scaled. if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) { if (CIdx->isZero()) continue; BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue(); continue; } - + uint64_t Scale = TD->getTypeAllocSize(*GTI); ExtensionKind Extension = EK_NotExtended; - + // If the integer type is smaller than the pointer size, it is implicitly // sign extended to pointer size. - unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth(); - if (TD->getPointerSizeInBits() > Width) + unsigned Width = Index->getType()->getIntegerBitWidth(); + if (TD->getPointerSizeInBits(AS) > Width) Extension = EK_SignExt; - + // Use GetLinearExpression to decompose the index into a C1*V+C2 form. APInt IndexScale(Width, 0), IndexOffset(Width, 0); Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension, *TD, 0); - + // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale. // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale. BaseOffs += IndexOffset.getSExtValue()*Scale; Scale *= IndexScale.getSExtValue(); - - + // If we already had an occurrence of this index variable, merge this // scale into it. For example, we want to handle: // A[x][x] -> x*16 + x*4 -> x*20 @@ -370,25 +380,25 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, break; } } - + // Make sure that we have a scale that makes sense for this target's // pointer size. - if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) { + if (unsigned ShiftBits = 64 - TD->getPointerSizeInBits(AS)) { Scale <<= ShiftBits; Scale = (int64_t)Scale >> ShiftBits; } - + if (Scale) { VariableGEPIndex Entry = {Index, Extension, static_cast<int64_t>(Scale)}; VarIndices.push_back(Entry); } } - + // Analyze the base pointer next. V = GEPOp->getOperand(0); } while (--MaxLookup); - + // If the chain of expressions is too deep, just return early. return V; } @@ -396,7 +406,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, /// GetIndexDifference - Dest and Src are the variable indices from two /// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base /// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic -/// difference between the two pointers. +/// difference between the two pointers. static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest, const SmallVectorImpl<VariableGEPIndex> &Src) { if (Src.empty()) return; @@ -405,12 +415,12 @@ static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest, const Value *V = Src[i].V; ExtensionKind Extension = Src[i].Extension; int64_t Scale = Src[i].Scale; - + // Find V in Dest. This is N^2, but pointer indices almost never have more // than a few variable indexes. for (unsigned j = 0, e = Dest.size(); j != e; ++j) { if (Dest[j].V != V || Dest[j].Extension != Extension) continue; - + // If we found it, subtract off Scale V's from the entry in Dest. If it // goes to zero, remove the entry. if (Dest[j].Scale != Scale) @@ -420,7 +430,7 @@ static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest, Scale = 0; break; } - + // If we didn't consume this entry, add it to the end of the Dest list. if (Scale) { VariableGEPIndex Entry = { V, Extension, -Scale }; @@ -515,7 +525,7 @@ namespace { return (AliasAnalysis*)this; return this; } - + private: // AliasCache - Track alias queries to guard against recursion. typedef std::pair<Location, Location> LocPair; @@ -685,7 +695,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, "AliasAnalysis query involving multiple functions!"); const Value *Object = GetUnderlyingObject(Loc.Ptr, TD); - + // If this is a tail call and Loc.Ptr points to a stack location, we know that // the tail call cannot access or modify the local stack. // We cannot exclude byval arguments here; these belong to the caller of @@ -695,7 +705,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) if (CI->isTailCall()) return NoModRef; - + // If the pointer is to a locally allocated object that does not escape, // then the call can not mod/ref the pointer unless the call takes the pointer // as an argument, and itself doesn't capture it. @@ -711,7 +721,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, if (!(*CI)->getType()->isPointerTy() || (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo))) continue; - + // If this is a no-capture pointer argument, see if we can tell that it // is impossible to alias the pointer we're checking. If not, we have to // assume that the call could touch the pointer, even though it doesn't @@ -721,7 +731,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, break; } } - + if (!PassedAsArg) return NoModRef; } @@ -810,7 +820,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, } // We can bound the aliasing properties of memset_pattern16 just as we can - // for memcpy/memset. This is particularly important because the + // for memcpy/memset. This is particularly important because the // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16 // whenever possible. else if (TLI.has(LibFunc::memset_pattern16) && @@ -846,8 +856,8 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min); } -static bool areVarIndicesEqual(SmallVector<VariableGEPIndex, 4> &Indices1, - SmallVector<VariableGEPIndex, 4> &Indices2) { +static bool areVarIndicesEqual(SmallVectorImpl<VariableGEPIndex> &Indices1, + SmallVectorImpl<VariableGEPIndex> &Indices2) { unsigned Size1 = Indices1.size(); unsigned Size2 = Indices2.size(); @@ -914,22 +924,22 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, GEP1VariableIndices.clear(); } } - + // If we get a No or May, then return it immediately, no amount of analysis // will improve this situation. if (BaseAlias != MustAlias) return BaseAlias; - + // Otherwise, we have a MustAlias. Since the base pointers alias each other // exactly, see if the computed offset from the common pointer tells us // about the relation of the resulting pointer. const Value *GEP1BasePtr = DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD); - + int64_t GEP2BaseOffset; SmallVector<VariableGEPIndex, 4> GEP2VariableIndices; const Value *GEP2BasePtr = DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD); - + // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { @@ -937,12 +947,12 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, "DecomposeGEPExpression and GetUnderlyingObject disagree!"); return MayAlias; } - + // Subtract the GEP2 pointer from the GEP1 pointer to find out their // symbolic difference. GEP1BaseOffset -= GEP2BaseOffset; GetIndexDifference(GEP1VariableIndices, GEP2VariableIndices); - + } else { // Check to see if these two pointers are related by the getelementptr // instruction. If one pointer is a GEP with a non-zero index of the other @@ -964,7 +974,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, const Value *GEP1BasePtr = DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD); - + // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. if (GEP1BasePtr != UnderlyingV1) { @@ -973,7 +983,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, return MayAlias; } } - + // In the two GEP Case, if there is no difference in the offsets of the // computed pointers, the resultant pointers are a must alias. This // hapens when we have two lexically identical GEP's (for example). @@ -1205,17 +1215,17 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, (isa<Constant>(O2) && isIdentifiedObject(O1) && !isa<Constant>(O1))) return NoAlias; - // Arguments can't alias with local allocations or noalias calls - // in the same function. - if (((isa<Argument>(O1) && (isa<AllocaInst>(O2) || isNoAliasCall(O2))) || - (isa<Argument>(O2) && (isa<AllocaInst>(O1) || isNoAliasCall(O1))))) + // Function arguments can't alias with things that are known to be + // unambigously identified at the function level. + if ((isa<Argument>(O1) && isIdentifiedFunctionLocal(O2)) || + (isa<Argument>(O2) && isIdentifiedFunctionLocal(O1))) return NoAlias; // Most objects can't alias null. if ((isa<ConstantPointerNull>(O2) && isKnownNonNull(O1)) || (isa<ConstantPointerNull>(O1) && isKnownNonNull(O2))) return NoAlias; - + // If one pointer is the result of a call/invoke or load and the other is a // non-escaping local object within the same function, then we know the // object couldn't escape to a point where the call could return it. @@ -1237,7 +1247,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD, *TLI)) || (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD, *TLI))) return NoAlias; - + // Check the cache before climbing up use-def chains. This also terminates // otherwise infinitely recursive queries. LocPair Locs(Location(V1, V1Size, V1TBAAInfo), diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp index 100e5c8..62f3ab1 100644 --- a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp +++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp @@ -1,4 +1,4 @@ -//=======-------- BlockFrequencyInfo.cpp - Block Frequency Analysis -------=======// +//=======-------- BlockFrequencyInfo.cpp - Block Frequency Analysis -------===// // // The LLVM Compiler Infrastructure // @@ -17,14 +17,97 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/InitializePasses.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" using namespace llvm; -INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis", - true, true) +#ifndef NDEBUG +enum GVDAGType { + GVDT_None, + GVDT_Fraction, + GVDT_Integer +}; + +static cl::opt<GVDAGType> +ViewBlockFreqPropagationDAG("view-block-freq-propagation-dags", cl::Hidden, + cl::desc("Pop up a window to show a dag displaying how block " + "frequencies propagation through the CFG."), + cl::values( + clEnumValN(GVDT_None, "none", + "do not display graphs."), + clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " + "fractional block frequency representation."), + clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " + "integer fractional block frequency representation."), + clEnumValEnd)); + +namespace llvm { + +template <> +struct GraphTraits<BlockFrequencyInfo *> { + typedef const BasicBlock NodeType; + typedef succ_const_iterator ChildIteratorType; + typedef Function::const_iterator nodes_iterator; + + static inline const NodeType *getEntryNode(const BlockFrequencyInfo *G) { + return G->getFunction()->begin(); + } + static ChildIteratorType child_begin(const NodeType *N) { + return succ_begin(N); + } + static ChildIteratorType child_end(const NodeType *N) { + return succ_end(N); + } + static nodes_iterator nodes_begin(const BlockFrequencyInfo *G) { + return G->getFunction()->begin(); + } + static nodes_iterator nodes_end(const BlockFrequencyInfo *G) { + return G->getFunction()->end(); + } +}; + +template<> +struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits { + explicit DOTGraphTraits(bool isSimple=false) : + DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(const BlockFrequencyInfo *G) { + return G->getFunction()->getName(); + } + + std::string getNodeLabel(const BasicBlock *Node, + const BlockFrequencyInfo *Graph) { + std::string Result; + raw_string_ostream OS(Result); + + OS << Node->getName().str() << ":"; + switch (ViewBlockFreqPropagationDAG) { + case GVDT_Fraction: + Graph->getBlockFreq(Node).print(OS); + break; + case GVDT_Integer: + OS << Graph->getBlockFreq(Node).getFrequency(); + break; + case GVDT_None: + llvm_unreachable("If we are not supposed to render a graph we should " + "never reach this point."); + } + + return Result; + } +}; + +} // end namespace llvm +#endif + +INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq", + "Block Frequency Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo) -INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis", - true, true) +INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq", + "Block Frequency Analysis", true, true) char BlockFrequencyInfo::ID = 0; @@ -46,6 +129,10 @@ void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const { bool BlockFrequencyInfo::runOnFunction(Function &F) { BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>(); BFI->doFunction(&F, &BPI); +#ifndef NDEBUG + if (ViewBlockFreqPropagationDAG != GVDT_None) + view(); +#endif return false; } @@ -53,11 +140,22 @@ void BlockFrequencyInfo::print(raw_ostream &O, const Module *) const { if (BFI) BFI->print(O); } -/// getblockFreq - Return block frequency. Return 0 if we don't have the -/// information. Please note that initial frequency is equal to 1024. It means -/// that we should not rely on the value itself, but only on the comparison to -/// the other block frequencies. We do this to avoid using of floating points. -/// BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const { return BFI->getBlockFreq(BB); } + +/// Pop up a ghostview window with the current block frequency propagation +/// rendered using dot. +void BlockFrequencyInfo::view() const { +// This code is only for debugging. +#ifndef NDEBUG + ViewGraph(const_cast<BlockFrequencyInfo *>(this), "BlockFrequencyDAGs"); +#else + errs() << "BlockFrequencyInfo::view is only available in debug builds on " + "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +const Function *BlockFrequencyInfo::getFunction() const { + return BFI->Fn; +} diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp index 6c58856..86560ca 100644 --- a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -69,6 +69,20 @@ static const uint32_t UR_TAKEN_WEIGHT = 1; /// easily subsume it. static const uint32_t UR_NONTAKEN_WEIGHT = 1024*1024 - 1; +/// \brief Weight for a branch taken going into a cold block. +/// +/// This is the weight for a branch taken toward a block marked +/// cold. A block is marked cold if it's postdominated by a +/// block containing a call to a cold function. Cold functions +/// are those marked with attribute 'cold'. +static const uint32_t CC_TAKEN_WEIGHT = 4; + +/// \brief Weight for a branch not-taken into a cold block. +/// +/// This is the weight for a branch not taken toward a block marked +/// cold. +static const uint32_t CC_NONTAKEN_WEIGHT = 64; + static const uint32_t PH_TAKEN_WEIGHT = 20; static const uint32_t PH_NONTAKEN_WEIGHT = 12; @@ -137,8 +151,8 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) { uint32_t UnreachableWeight = std::max(UR_TAKEN_WEIGHT / (unsigned)UnreachableEdges.size(), MIN_WEIGHT); - for (SmallVector<unsigned, 4>::iterator I = UnreachableEdges.begin(), - E = UnreachableEdges.end(); + for (SmallVectorImpl<unsigned>::iterator I = UnreachableEdges.begin(), + E = UnreachableEdges.end(); I != E; ++I) setEdgeWeight(BB, *I, UnreachableWeight); @@ -147,8 +161,8 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) { uint32_t ReachableWeight = std::max(UR_NONTAKEN_WEIGHT / (unsigned)ReachableEdges.size(), NORMAL_WEIGHT); - for (SmallVector<unsigned, 4>::iterator I = ReachableEdges.begin(), - E = ReachableEdges.end(); + for (SmallVectorImpl<unsigned>::iterator I = ReachableEdges.begin(), + E = ReachableEdges.end(); I != E; ++I) setEdgeWeight(BB, *I, ReachableWeight); @@ -193,6 +207,67 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) { return true; } +/// \brief Calculate edge weights for edges leading to cold blocks. +/// +/// A cold block is one post-dominated by a block with a call to a +/// cold function. Those edges are unlikely to be taken, so we give +/// them relatively low weight. +/// +/// Return true if we could compute the weights for cold edges. +/// Return false, otherwise. +bool BranchProbabilityInfo::calcColdCallHeuristics(BasicBlock *BB) { + TerminatorInst *TI = BB->getTerminator(); + if (TI->getNumSuccessors() == 0) + return false; + + // Determine which successors are post-dominated by a cold block. + SmallVector<unsigned, 4> ColdEdges; + SmallVector<unsigned, 4> NormalEdges; + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) + if (PostDominatedByColdCall.count(*I)) + ColdEdges.push_back(I.getSuccessorIndex()); + else + NormalEdges.push_back(I.getSuccessorIndex()); + + // If all successors are in the set of blocks post-dominated by cold calls, + // this block is in the set post-dominated by cold calls. + if (ColdEdges.size() == TI->getNumSuccessors()) + PostDominatedByColdCall.insert(BB); + else { + // Otherwise, if the block itself contains a cold function, add it to the + // set of blocks postdominated by a cold call. + assert(!PostDominatedByColdCall.count(BB)); + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (CallInst *CI = dyn_cast<CallInst>(I)) + if (CI->hasFnAttr(Attribute::Cold)) { + PostDominatedByColdCall.insert(BB); + break; + } + } + + // Skip probabilities if this block has a single successor. + if (TI->getNumSuccessors() == 1 || ColdEdges.empty()) + return false; + + uint32_t ColdWeight = + std::max(CC_TAKEN_WEIGHT / (unsigned) ColdEdges.size(), MIN_WEIGHT); + for (SmallVectorImpl<unsigned>::iterator I = ColdEdges.begin(), + E = ColdEdges.end(); + I != E; ++I) + setEdgeWeight(BB, *I, ColdWeight); + + if (NormalEdges.empty()) + return true; + uint32_t NormalWeight = std::max( + CC_NONTAKEN_WEIGHT / (unsigned) NormalEdges.size(), NORMAL_WEIGHT); + for (SmallVectorImpl<unsigned>::iterator I = NormalEdges.begin(), + E = NormalEdges.end(); + I != E; ++I) + setEdgeWeight(BB, *I, NormalWeight); + + return true; +} + // Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion // between two pointer or pointer and NULL will fail. bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) { @@ -251,7 +326,7 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { if (backWeight < NORMAL_WEIGHT) backWeight = NORMAL_WEIGHT; - for (SmallVector<unsigned, 8>::iterator EI = BackEdges.begin(), + for (SmallVectorImpl<unsigned>::iterator EI = BackEdges.begin(), EE = BackEdges.end(); EI != EE; ++EI) { setEdgeWeight(BB, *EI, backWeight); } @@ -262,7 +337,7 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { if (inWeight < NORMAL_WEIGHT) inWeight = NORMAL_WEIGHT; - for (SmallVector<unsigned, 8>::iterator EI = InEdges.begin(), + for (SmallVectorImpl<unsigned>::iterator EI = InEdges.begin(), EE = InEdges.end(); EI != EE; ++EI) { setEdgeWeight(BB, *EI, inWeight); } @@ -273,7 +348,7 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { if (exitWeight < MIN_WEIGHT) exitWeight = MIN_WEIGHT; - for (SmallVector<unsigned, 8>::iterator EI = ExitingEdges.begin(), + for (SmallVectorImpl<unsigned>::iterator EI = ExitingEdges.begin(), EE = ExitingEdges.end(); EI != EE; ++EI) { setEdgeWeight(BB, *EI, exitWeight); } @@ -323,10 +398,24 @@ bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) { // InstCombine canonicalizes X <= 0 into X < 1. // X <= 0 -> Unlikely isProb = false; - } else if (CV->isAllOnesValue() && CI->getPredicate() == CmpInst::ICMP_SGT) { - // InstCombine canonicalizes X >= 0 into X > -1. - // X >= 0 -> Likely - isProb = true; + } else if (CV->isAllOnesValue()) { + switch (CI->getPredicate()) { + case CmpInst::ICMP_EQ: + // X == -1 -> Unlikely + isProb = false; + break; + case CmpInst::ICMP_NE: + // X != -1 -> Likely + isProb = true; + break; + case CmpInst::ICMP_SGT: + // InstCombine canonicalizes X >= 0 into X > -1. + // X >= 0 -> Likely + isProb = true; + break; + default: + return false; + } } else { return false; } @@ -397,6 +486,7 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) { LastF = &F; // Store the last function we ran on for printing. LI = &getAnalysis<LoopInfo>(); assert(PostDominatedByUnreachable.empty()); + assert(PostDominatedByColdCall.empty()); // Walk the basic blocks in post-order so that we can build up state about // the successors of a block iteratively. @@ -408,6 +498,8 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) { continue; if (calcMetadataWeights(*I)) continue; + if (calcColdCallHeuristics(*I)) + continue; if (calcLoopBranchHeuristics(*I)) continue; if (calcPointerHeuristics(*I)) @@ -420,6 +512,7 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) { } PostDominatedByUnreachable.clear(); + PostDominatedByColdCall.clear(); return false; } diff --git a/contrib/llvm/lib/Analysis/CFG.cpp b/contrib/llvm/lib/Analysis/CFG.cpp new file mode 100644 index 0000000..c3f32d3 --- /dev/null +++ b/contrib/llvm/lib/Analysis/CFG.cpp @@ -0,0 +1,245 @@ +//===-- CFG.cpp - BasicBlock analysis --------------------------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of functions performs analyses on basic blocks, and instructions +// contained within basic blocks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CFG.h" + +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" + +using namespace llvm; + +/// FindFunctionBackedges - Analyze the specified function to find all of the +/// loop backedges in the function and return them. This is a relatively cheap +/// (compared to computing dominators and loop info) analysis. +/// +/// The output is added to Result, as pairs of <from,to> edge info. +void llvm::FindFunctionBackedges(const Function &F, + SmallVectorImpl<std::pair<const BasicBlock*,const BasicBlock*> > &Result) { + const BasicBlock *BB = &F.getEntryBlock(); + if (succ_begin(BB) == succ_end(BB)) + return; + + SmallPtrSet<const BasicBlock*, 8> Visited; + SmallVector<std::pair<const BasicBlock*, succ_const_iterator>, 8> VisitStack; + SmallPtrSet<const BasicBlock*, 8> InStack; + + Visited.insert(BB); + VisitStack.push_back(std::make_pair(BB, succ_begin(BB))); + InStack.insert(BB); + do { + std::pair<const BasicBlock*, succ_const_iterator> &Top = VisitStack.back(); + const BasicBlock *ParentBB = Top.first; + succ_const_iterator &I = Top.second; + + bool FoundNew = false; + while (I != succ_end(ParentBB)) { + BB = *I++; + if (Visited.insert(BB)) { + FoundNew = true; + break; + } + // Successor is in VisitStack, it's a back edge. + if (InStack.count(BB)) + Result.push_back(std::make_pair(ParentBB, BB)); + } + + if (FoundNew) { + // Go down one level if there is a unvisited successor. + InStack.insert(BB); + VisitStack.push_back(std::make_pair(BB, succ_begin(BB))); + } else { + // Go up one level. + InStack.erase(VisitStack.pop_back_val().first); + } + } while (!VisitStack.empty()); +} + +/// GetSuccessorNumber - Search for the specified successor of basic block BB +/// and return its position in the terminator instruction's list of +/// successors. It is an error to call this with a block that is not a +/// successor. +unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) { + TerminatorInst *Term = BB->getTerminator(); +#ifndef NDEBUG + unsigned e = Term->getNumSuccessors(); +#endif + for (unsigned i = 0; ; ++i) { + assert(i != e && "Didn't find edge?"); + if (Term->getSuccessor(i) == Succ) + return i; + } +} + +/// isCriticalEdge - Return true if the specified edge is a critical edge. +/// Critical edges are edges from a block with multiple successors to a block +/// with multiple predecessors. +bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum, + bool AllowIdenticalEdges) { + assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!"); + if (TI->getNumSuccessors() == 1) return false; + + const BasicBlock *Dest = TI->getSuccessor(SuccNum); + const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest); + + // If there is more than one predecessor, this is a critical edge... + assert(I != E && "No preds, but we have an edge to the block?"); + const BasicBlock *FirstPred = *I; + ++I; // Skip one edge due to the incoming arc from TI. + if (!AllowIdenticalEdges) + return I != E; + + // If AllowIdenticalEdges is true, then we allow this edge to be considered + // non-critical iff all preds come from TI's block. + while (I != E) { + const BasicBlock *P = *I; + if (P != FirstPred) + return true; + // Note: leave this as is until no one ever compiles with either gcc 4.0.1 + // or Xcode 2. This seems to work around the pred_iterator assert in PR 2207 + E = pred_end(P); + ++I; + } + return false; +} + +// LoopInfo contains a mapping from basic block to the innermost loop. Find +// the outermost loop in the loop nest that contains BB. +static const Loop *getOutermostLoop(const LoopInfo *LI, const BasicBlock *BB) { + const Loop *L = LI->getLoopFor(BB); + if (L) { + while (const Loop *Parent = L->getParentLoop()) + L = Parent; + } + return L; +} + +// True if there is a loop which contains both BB1 and BB2. +static bool loopContainsBoth(const LoopInfo *LI, + const BasicBlock *BB1, const BasicBlock *BB2) { + const Loop *L1 = getOutermostLoop(LI, BB1); + const Loop *L2 = getOutermostLoop(LI, BB2); + return L1 != NULL && L1 == L2; +} + +static bool isPotentiallyReachableInner(SmallVectorImpl<BasicBlock *> &Worklist, + BasicBlock *StopBB, + const DominatorTree *DT, + const LoopInfo *LI) { + // When the stop block is unreachable, it's dominated from everywhere, + // regardless of whether there's a path between the two blocks. + if (DT && !DT->isReachableFromEntry(StopBB)) + DT = 0; + + // Limit the number of blocks we visit. The goal is to avoid run-away compile + // times on large CFGs without hampering sensible code. Arbitrarily chosen. + unsigned Limit = 32; + SmallSet<const BasicBlock*, 64> Visited; + do { + BasicBlock *BB = Worklist.pop_back_val(); + if (!Visited.insert(BB)) + continue; + if (BB == StopBB) + return true; + if (DT && DT->dominates(BB, StopBB)) + return true; + if (LI && loopContainsBoth(LI, BB, StopBB)) + return true; + + if (!--Limit) { + // We haven't been able to prove it one way or the other. Conservatively + // answer true -- that there is potentially a path. + return true; + } + + if (const Loop *Outer = LI ? getOutermostLoop(LI, BB) : 0) { + // All blocks in a single loop are reachable from all other blocks. From + // any of these blocks, we can skip directly to the exits of the loop, + // ignoring any other blocks inside the loop body. + Outer->getExitBlocks(Worklist); + } else { + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) + Worklist.push_back(*I); + } + } while (!Worklist.empty()); + + // We have exhausted all possible paths and are certain that 'To' can not be + // reached from 'From'. + return false; +} + +bool llvm::isPotentiallyReachable(const BasicBlock *A, const BasicBlock *B, + const DominatorTree *DT, const LoopInfo *LI) { + assert(A->getParent() == B->getParent() && + "This analysis is function-local!"); + + SmallVector<BasicBlock*, 32> Worklist; + Worklist.push_back(const_cast<BasicBlock*>(A)); + + return isPotentiallyReachableInner(Worklist, const_cast<BasicBlock*>(B), + DT, LI); +} + +bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B, + const DominatorTree *DT, const LoopInfo *LI) { + assert(A->getParent()->getParent() == B->getParent()->getParent() && + "This analysis is function-local!"); + + SmallVector<BasicBlock*, 32> Worklist; + + if (A->getParent() == B->getParent()) { + // The same block case is special because it's the only time we're looking + // within a single block to see which instruction comes first. Once we + // start looking at multiple blocks, the first instruction of the block is + // reachable, so we only need to determine reachability between whole + // blocks. + BasicBlock *BB = const_cast<BasicBlock *>(A->getParent()); + + // If the block is in a loop then we can reach any instruction in the block + // from any other instruction in the block by going around a backedge. + if (LI && LI->getLoopFor(BB) != 0) + return true; + + // Linear scan, start at 'A', see whether we hit 'B' or the end first. + for (BasicBlock::const_iterator I = A, E = BB->end(); I != E; ++I) { + if (&*I == B) + return true; + } + + // Can't be in a loop if it's the entry block -- the entry block may not + // have predecessors. + if (BB == &BB->getParent()->getEntryBlock()) + return false; + + // Otherwise, continue doing the normal per-BB CFG walk. + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) + Worklist.push_back(*I); + + if (Worklist.empty()) { + // We've proven that there's no path! + return false; + } + } else { + Worklist.push_back(const_cast<BasicBlock*>(A->getParent())); + } + + if (A->getParent() == &A->getParent()->getParent()->getEntryBlock()) + return true; + if (B->getParent() == &A->getParent()->getParent()->getEntryBlock()) + return false; + + return isPotentiallyReachableInner(Worklist, + const_cast<BasicBlock*>(B->getParent()), + DT, LI); +} diff --git a/contrib/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm/lib/Analysis/CaptureTracking.cpp index 2118917..79fab1b 100644 --- a/contrib/llvm/lib/Analysis/CaptureTracking.cpp +++ b/contrib/llvm/lib/Analysis/CaptureTracking.cpp @@ -164,10 +164,10 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) { // Don't count comparisons of a no-alias return value against null as // captures. This allows us to ignore comparisons of malloc results // with null, for example. - if (isNoAliasCall(V->stripPointerCasts())) - if (ConstantPointerNull *CPN = - dyn_cast<ConstantPointerNull>(I->getOperand(1))) - if (CPN->getType()->getAddressSpace() == 0) + if (ConstantPointerNull *CPN = + dyn_cast<ConstantPointerNull>(I->getOperand(1))) + if (CPN->getType()->getAddressSpace() == 0) + if (isNoAliasCall(V->stripPointerCasts())) break; // Otherwise, be conservative. There are crazy ways to capture pointers // using comparisons. diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp index bc0dffc..3d32232 100644 --- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp +++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp @@ -224,7 +224,8 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, APInt &Offset, const DataLayout &TD) { // Trivial case, constant is the global. if ((GV = dyn_cast<GlobalValue>(C))) { - Offset.clearAllBits(); + unsigned BitWidth = TD.getPointerTypeSizeInBits(GV->getType()); + Offset = APInt(BitWidth, 0); return true; } @@ -238,16 +239,23 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD); // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) - if (GEPOperator *GEP = dyn_cast<GEPOperator>(CE)) { - // If the base isn't a global+constant, we aren't either. - if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD)) - return false; + GEPOperator *GEP = dyn_cast<GEPOperator>(CE); + if (!GEP) + return false; - // Otherwise, add any offset that our operands provide. - return GEP->accumulateConstantOffset(TD, Offset); - } + unsigned BitWidth = TD.getPointerTypeSizeInBits(GEP->getType()); + APInt TmpOffset(BitWidth, 0); - return false; + // If the base isn't a global+constant, we aren't either. + if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, TD)) + return false; + + // Otherwise, add any offset that our operands provide. + if (!GEP->accumulateConstantOffset(TD, TmpOffset)) + return false; + + Offset = TmpOffset; + return true; } /// ReadDataFromGlobal - Recursive helper to read bits out of global. C is the @@ -324,12 +332,12 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, // If we read all of the bytes we needed from this element we're done. uint64_t NextEltOffset = SL->getElementOffset(Index); - if (BytesLeft <= NextEltOffset-CurEltOffset-ByteOffset) + if (BytesLeft <= NextEltOffset - CurEltOffset - ByteOffset) return true; // Move to the next element of the struct. - CurPtr += NextEltOffset-CurEltOffset-ByteOffset; - BytesLeft -= NextEltOffset-CurEltOffset-ByteOffset; + CurPtr += NextEltOffset - CurEltOffset - ByteOffset; + BytesLeft -= NextEltOffset - CurEltOffset - ByteOffset; ByteOffset = 0; CurEltOffset = NextEltOffset; } @@ -338,7 +346,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, if (isa<ConstantArray>(C) || isa<ConstantVector>(C) || isa<ConstantDataSequential>(C)) { - Type *EltTy = cast<SequentialType>(C->getType())->getElementType(); + Type *EltTy = C->getType()->getSequentialElementType(); uint64_t EltSize = TD.getTypeAllocSize(EltTy); uint64_t Index = ByteOffset / EltSize; uint64_t Offset = ByteOffset - Index * EltSize; @@ -346,7 +354,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, if (ArrayType *AT = dyn_cast<ArrayType>(C->getType())) NumElts = AT->getNumElements(); else - NumElts = cast<VectorType>(C->getType())->getNumElements(); + NumElts = C->getType()->getVectorNumElements(); for (; Index != NumElts; ++Index) { if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr, @@ -367,9 +375,10 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { if (CE->getOpcode() == Instruction::IntToPtr && - CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext())) + CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getType())) { return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, BytesLeft, TD); + } } // Otherwise, unknown initializer type. @@ -378,26 +387,29 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, const DataLayout &TD) { - Type *LoadTy = cast<PointerType>(C->getType())->getElementType(); + PointerType *PTy = cast<PointerType>(C->getType()); + Type *LoadTy = PTy->getElementType(); IntegerType *IntType = dyn_cast<IntegerType>(LoadTy); // If this isn't an integer load we can't fold it directly. if (!IntType) { + unsigned AS = PTy->getAddressSpace(); + // If this is a float/double load, we can try folding it as an int32/64 load // and then bitcast the result. This can be useful for union cases. Note // that address spaces don't matter here since we're not going to result in // an actual new load. Type *MapTy; if (LoadTy->isHalfTy()) - MapTy = Type::getInt16PtrTy(C->getContext()); + MapTy = Type::getInt16PtrTy(C->getContext(), AS); else if (LoadTy->isFloatTy()) - MapTy = Type::getInt32PtrTy(C->getContext()); + MapTy = Type::getInt32PtrTy(C->getContext(), AS); else if (LoadTy->isDoubleTy()) - MapTy = Type::getInt64PtrTy(C->getContext()); + MapTy = Type::getInt64PtrTy(C->getContext(), AS); else if (LoadTy->isVectorTy()) { - MapTy = IntegerType::get(C->getContext(), - TD.getTypeAllocSizeInBits(LoadTy)); - MapTy = PointerType::getUnqual(MapTy); + MapTy = PointerType::getIntNPtrTy(C->getContext(), + TD.getTypeAllocSizeInBits(LoadTy), + AS); } else return 0; @@ -408,10 +420,11 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, } unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8; - if (BytesLoaded > 32 || BytesLoaded == 0) return 0; + if (BytesLoaded > 32 || BytesLoaded == 0) + return 0; GlobalValue *GVal; - APInt Offset(TD.getPointerSizeInBits(), 0); + APInt Offset; if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD)) return 0; @@ -422,7 +435,8 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, // If we're loading off the beginning of the global, some bytes may be valid, // but we don't try to handle this. - if (Offset.isNegative()) return 0; + if (Offset.isNegative()) + return 0; // If we're not accessing anything in this constant, the result is undefined. if (Offset.getZExtValue() >= @@ -439,7 +453,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, ResultVal = RawBytes[BytesLoaded - 1]; for (unsigned i = 1; i != BytesLoaded; ++i) { ResultVal <<= 8; - ResultVal |= RawBytes[BytesLoaded-1-i]; + ResultVal |= RawBytes[BytesLoaded - 1 - i]; } } else { ResultVal = RawBytes[0]; @@ -464,14 +478,17 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, // If the loaded value isn't a constant expr, we can't handle it. ConstantExpr *CE = dyn_cast<ConstantExpr>(C); - if (!CE) return 0; + if (!CE) + return 0; if (CE->getOpcode() == Instruction::GetElementPtr) { - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0))) - if (GV->isConstant() && GV->hasDefinitiveInitializer()) + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0))) { + if (GV->isConstant() && GV->hasDefinitiveInitializer()) { if (Constant *V = ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE)) return V; + } + } } // Instead of loading constant c string, use corresponding integer value @@ -576,13 +593,13 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, // constant. This happens frequently when iterating over a global array. if (Opc == Instruction::Sub && DL) { GlobalValue *GV1, *GV2; - unsigned PtrSize = DL->getPointerSizeInBits(); - unsigned OpSize = DL->getTypeSizeInBits(Op0->getType()); - APInt Offs1(PtrSize, 0), Offs2(PtrSize, 0); + APInt Offs1, Offs2; if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *DL)) if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *DL) && GV1 == GV2) { + unsigned OpSize = DL->getTypeSizeInBits(Op0->getType()); + // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow. // PtrToInt may change the bitwidth so we have convert to the right size // first. @@ -600,15 +617,18 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, Type *ResultTy, const DataLayout *TD, const TargetLibraryInfo *TLI) { - if (!TD) return 0; - Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext()); + if (!TD) + return 0; + + Type *IntPtrTy = TD->getIntPtrType(ResultTy); bool Any = false; SmallVector<Constant*, 32> NewIdxs; for (unsigned i = 1, e = Ops.size(); i != e; ++i) { if ((i == 1 || - !isa<StructType>(GetElementPtrInst::getIndexedType(Ops[0]->getType(), - Ops.slice(1, i-1)))) && + !isa<StructType>(GetElementPtrInst::getIndexedType( + Ops[0]->getType(), + Ops.slice(1, i - 1)))) && Ops[i]->getType() != IntPtrTy) { Any = true; NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i], @@ -619,13 +639,16 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, } else NewIdxs.push_back(Ops[i]); } - if (!Any) return 0; - Constant *C = - ConstantExpr::getGetElementPtr(Ops[0], NewIdxs); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + if (!Any) + return 0; + + Constant *C = ConstantExpr::getGetElementPtr(Ops[0], NewIdxs); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) C = Folded; + } + return C; } @@ -640,7 +663,7 @@ static Constant* StripPtrCastKeepAS(Constant* Ptr) { if (NewPtrTy->getAddressSpace() != OldPtrTy->getAddressSpace()) { NewPtrTy = NewPtrTy->getElementType()->getPointerTo( OldPtrTy->getAddressSpace()); - Ptr = ConstantExpr::getBitCast(Ptr, NewPtrTy); + Ptr = ConstantExpr::getPointerCast(Ptr, NewPtrTy); } return Ptr; } @@ -651,11 +674,12 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, Type *ResultTy, const DataLayout *TD, const TargetLibraryInfo *TLI) { Constant *Ptr = Ops[0]; - if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized() || + if (!TD || !Ptr->getType()->getPointerElementType()->isSized() || !Ptr->getType()->isPointerTy()) return 0; - Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext()); + Type *IntPtrTy = TD->getIntPtrType(Ptr->getType()); + Type *ResultElementTy = ResultTy->getPointerElementType(); // If this is a constant expr gep that is effectively computing an // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12' @@ -664,8 +688,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, // If this is "gep i8* Ptr, (sub 0, V)", fold this as: // "inttoptr (sub (ptrtoint Ptr), V)" - if (Ops.size() == 2 && - cast<PointerType>(ResultTy)->getElementType()->isIntegerTy(8)) { + if (Ops.size() == 2 && ResultElementTy->isIntegerTy(8)) { ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[1]); assert((CE == 0 || CE->getType() == IntPtrTy) && "CastGEPIndices didn't canonicalize index types!"); @@ -692,7 +715,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, // If this is a GEP of a GEP, fold it all into a single GEP. while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) { - SmallVector<Value *, 4> NestedOps(GEP->op_begin()+1, GEP->op_end()); + SmallVector<Value *, 4> NestedOps(GEP->op_begin() + 1, GEP->op_end()); // Do not try the incorporate the sub-GEP if some index is not a number. bool AllConstantInt = true; @@ -713,12 +736,15 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, // If the base value for this address is a literal integer value, fold the // getelementptr to the resulting integer value casted to the pointer type. APInt BasePtr(BitWidth, 0); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) - if (CE->getOpcode() == Instruction::IntToPtr) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { + if (CE->getOpcode() == Instruction::IntToPtr) { if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0))) BasePtr = Base->getValue().zextOrTrunc(BitWidth); + } + } + if (Ptr->isNullValue() || BasePtr != 0) { - Constant *C = ConstantInt::get(Ptr->getContext(), Offset+BasePtr); + Constant *C = ConstantInt::get(Ptr->getContext(), Offset + BasePtr); return ConstantExpr::getIntToPtr(C, ResultTy); } @@ -728,7 +754,8 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, // Also, this helps GlobalOpt do SROA on GlobalVariables. Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Forming regular GEP of non-pointer type"); - SmallVector<Constant*, 32> NewIdxs; + SmallVector<Constant *, 32> NewIdxs; + do { if (SequentialType *ATy = dyn_cast<SequentialType>(Ty)) { if (ATy->isPointerTy()) { @@ -743,7 +770,6 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, // Determine which element of the array the offset points into. APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType())); - IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext()); if (ElemSize == 0) // The element size is 0. This may be [0 x Ty]*, so just use a zero // index for this level and proceed to the next level to see if it can @@ -778,7 +804,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, // We've reached some non-indexable type. break; } - } while (Ty != cast<PointerType>(ResultTy)->getElementType()); + } while (Ty != ResultElementTy); // If we haven't used up the entire offset by descending the static // type, then the offset is pointing into the middle of an indivisible @@ -787,14 +813,13 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, return 0; // Create a GEP. - Constant *C = - ConstantExpr::getGetElementPtr(Ptr, NewIdxs); - assert(cast<PointerType>(C->getType())->getElementType() == Ty && + Constant *C = ConstantExpr::getGetElementPtr(Ptr, NewIdxs); + assert(C->getType()->getPointerElementType() == Ty && "Computed GetElementPtr has unexpected type!"); // If we ended up indexing a member with a type that doesn't match // the type of what the original indices indexed, add a cast. - if (Ty != cast<PointerType>(ResultTy)->getElementType()) + if (Ty != ResultElementTy) C = FoldBitCast(C, ResultTy, *TD); return C; @@ -867,16 +892,18 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, if (const LoadInst *LI = dyn_cast<LoadInst>(I)) return ConstantFoldLoadInst(LI, TD); - if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I)) + if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I)) { return ConstantExpr::getInsertValue( cast<Constant>(IVI->getAggregateOperand()), cast<Constant>(IVI->getInsertedValueOperand()), IVI->getIndices()); + } - if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I)) + if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I)) { return ConstantExpr::getExtractValue( cast<Constant>(EVI->getAggregateOperand()), EVI->getIndices()); + } return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI); } @@ -930,9 +957,10 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, const TargetLibraryInfo *TLI) { // Handle easy binops first. if (Instruction::isBinaryOp(Opcode)) { - if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1])) + if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1])) { if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD)) return C; + } return ConstantExpr::get(Opcode, Ops[0], Ops[1]); } @@ -953,10 +981,11 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, if (TD && CE->getOpcode() == Instruction::IntToPtr) { Constant *Input = CE->getOperand(0); unsigned InWidth = Input->getType()->getScalarSizeInBits(); - if (TD->getPointerSizeInBits() < InWidth) { + unsigned PtrWidth = TD->getPointerTypeSizeInBits(CE->getType()); + if (PtrWidth < InWidth) { Constant *Mask = - ConstantInt::get(CE->getContext(), APInt::getLowBitsSet(InWidth, - TD->getPointerSizeInBits())); + ConstantInt::get(CE->getContext(), + APInt::getLowBitsSet(InWidth, PtrWidth)); Input = ConstantExpr::getAnd(Input, Mask); } // Do a zext or trunc to get to the dest size. @@ -966,13 +995,22 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, return ConstantExpr::getCast(Opcode, Ops[0], DestTy); case Instruction::IntToPtr: // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if - // the int size is >= the ptr size. This requires knowing the width of a - // pointer, so it can't be done in ConstantExpr::getCast. - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) - if (TD && - TD->getPointerSizeInBits() <= CE->getType()->getScalarSizeInBits() && - CE->getOpcode() == Instruction::PtrToInt) - return FoldBitCast(CE->getOperand(0), DestTy, *TD); + // the int size is >= the ptr size and the address spaces are the same. + // This requires knowing the width of a pointer, so it can't be done in + // ConstantExpr::getCast. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) { + if (TD && CE->getOpcode() == Instruction::PtrToInt) { + Constant *SrcPtr = CE->getOperand(0); + unsigned SrcPtrSize = TD->getPointerTypeSizeInBits(SrcPtr->getType()); + unsigned MidIntSize = CE->getType()->getScalarSizeInBits(); + + if (MidIntSize >= SrcPtrSize) { + unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace(); + if (SrcAS == DestTy->getPointerAddressSpace()) + return FoldBitCast(CE->getOperand(0), DestTy, *TD); + } + } + } return ConstantExpr::getCast(Opcode, Ops[0], DestTy); case Instruction::Trunc: @@ -984,6 +1022,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, case Instruction::SIToFP: case Instruction::FPToUI: case Instruction::FPToSI: + case Instruction::AddrSpaceCast: return ConstantExpr::getCast(Opcode, Ops[0], DestTy); case Instruction::BitCast: if (TD) @@ -1024,8 +1063,8 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, // around to know if bit truncation is happening. if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) { if (TD && Ops1->isNullValue()) { - Type *IntPtrTy = TD->getIntPtrType(CE0->getContext()); if (CE0->getOpcode() == Instruction::IntToPtr) { + Type *IntPtrTy = TD->getIntPtrType(CE0->getType()); // Convert the integer value to the right size to ensure we get the // proper extension or truncation. Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0), @@ -1036,19 +1075,21 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, // Only do this transformation if the int is intptrty in size, otherwise // there is a truncation or extension that we aren't modeling. - if (CE0->getOpcode() == Instruction::PtrToInt && - CE0->getType() == IntPtrTy) { - Constant *C = CE0->getOperand(0); - Constant *Null = Constant::getNullValue(C->getType()); - return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI); + if (CE0->getOpcode() == Instruction::PtrToInt) { + Type *IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType()); + if (CE0->getType() == IntPtrTy) { + Constant *C = CE0->getOperand(0); + Constant *Null = Constant::getNullValue(C->getType()); + return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI); + } } } if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) { if (TD && CE0->getOpcode() == CE1->getOpcode()) { - Type *IntPtrTy = TD->getIntPtrType(CE0->getContext()); - if (CE0->getOpcode() == Instruction::IntToPtr) { + Type *IntPtrTy = TD->getIntPtrType(CE0->getType()); + // Convert the integer value to the right size to ensure we get the // proper extension or truncation. Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0), @@ -1060,11 +1101,17 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, // Only do this transformation if the int is intptrty in size, otherwise // there is a truncation or extension that we aren't modeling. - if ((CE0->getOpcode() == Instruction::PtrToInt && - CE0->getType() == IntPtrTy && - CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType())) - return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), - CE1->getOperand(0), TD, TLI); + if (CE0->getOpcode() == Instruction::PtrToInt) { + Type *IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType()); + if (CE0->getType() == IntPtrTy && + CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) { + return ConstantFoldCompareInstOperands(Predicate, + CE0->getOperand(0), + CE1->getOperand(0), + TD, + TLI); + } + } } } @@ -1101,7 +1148,8 @@ Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, // addressing. for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i) { C = C->getAggregateElement(CE->getOperand(i)); - if (C == 0) return 0; + if (C == 0) + return 0; } return C; } @@ -1116,7 +1164,8 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C, // addressing. for (unsigned i = 0, e = Indices.size(); i != e; ++i) { C = C->getAggregateElement(Indices[i]); - if (C == 0) return 0; + if (C == 0) + return 0; } return C; } @@ -1128,8 +1177,7 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C, /// canConstantFoldCallTo - Return true if its even possible to fold a call to /// the specified function. -bool -llvm::canConstantFoldCallTo(const Function *F) { +bool llvm::canConstantFoldCallTo(const Function *F) { switch (F->getIntrinsicID()) { case Intrinsic::fabs: case Intrinsic::log: @@ -1167,7 +1215,8 @@ llvm::canConstantFoldCallTo(const Function *F) { case 0: break; } - if (!F->hasName()) return false; + if (!F->hasName()) + return false; StringRef Name = F->getName(); // In these cases, the check of the length is required. We don't want to @@ -1250,7 +1299,7 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), static Constant *ConstantFoldConvertToInt(const APFloat &Val, bool roundTowardZero, Type *Ty) { // All of these conversion intrinsics form an integer of at most 64bits. - unsigned ResultWidth = cast<IntegerType>(Ty)->getBitWidth(); + unsigned ResultWidth = Ty->getIntegerBitWidth(); assert(ResultWidth <= 64 && "Can only constant fold conversions to 64 and 32 bit ints"); @@ -1271,7 +1320,8 @@ static Constant *ConstantFoldConvertToInt(const APFloat &Val, Constant * llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, const TargetLibraryInfo *TLI) { - if (!F->hasName()) return 0; + if (!F->hasName()) + return 0; StringRef Name = F->getName(); Type *Ty = F->getReturnType(); diff --git a/contrib/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm/lib/Analysis/CostModel.cpp index 98a7780..f943258 100644 --- a/contrib/llvm/lib/Analysis/CostModel.cpp +++ b/contrib/llvm/lib/Analysis/CostModel.cpp @@ -19,6 +19,7 @@ #define CM_NAME "cost-model" #define DEBUG_TYPE CM_NAME +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Function.h" @@ -26,10 +27,15 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false), + cl::Hidden, + cl::desc("Recognize reduction patterns.")); + namespace { class CostModelAnalysis : public FunctionPass { @@ -81,7 +87,7 @@ CostModelAnalysis::runOnFunction(Function &F) { return false; } -static bool isReverseVectorMask(SmallVector<int, 16> &Mask) { +static bool isReverseVectorMask(SmallVectorImpl<int> &Mask) { for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i) if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i)) return false; @@ -105,6 +111,260 @@ static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { return OpInfo; } +static bool matchMask(SmallVectorImpl<int> &M1, SmallVectorImpl<int> &M2) { + if (M1.size() != M2.size()) + return false; + + for (unsigned i = 0, e = M1.size(); i != e; ++i) + if (M1[i] != M2[i]) + return false; + + return true; +} + +static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, + unsigned Level) { + // We don't need a shuffle if we just want to have element 0 in position 0 of + // the vector. + if (!SI && Level == 0 && IsLeft) + return true; + else if (!SI) + return false; + + SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1); + + // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether + // we look at the left or right side. + for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2) + Mask[i] = val; + + SmallVector<int, 16> ActualMask = SI->getShuffleMask(); + if (!matchMask(Mask, ActualMask)) + return false; + + return true; +} + +static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp, + unsigned Level, unsigned NumLevels) { + // Match one level of pairwise operations. + // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> + // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> + // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 + if (BinOp == 0) + return false; + + assert(BinOp->getType()->isVectorTy() && "Expecting a vector type"); + + unsigned Opcode = BinOp->getOpcode(); + Value *L = BinOp->getOperand(0); + Value *R = BinOp->getOperand(1); + + ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(L); + if (!LS && Level) + return false; + ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(R); + if (!RS && Level) + return false; + + // On level 0 we can omit one shufflevector instruction. + if (!Level && !RS && !LS) + return false; + + // Shuffle inputs must match. + Value *NextLevelOpL = LS ? LS->getOperand(0) : 0; + Value *NextLevelOpR = RS ? RS->getOperand(0) : 0; + Value *NextLevelOp = 0; + if (NextLevelOpR && NextLevelOpL) { + // If we have two shuffles their operands must match. + if (NextLevelOpL != NextLevelOpR) + return false; + + NextLevelOp = NextLevelOpL; + } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) { + // On the first level we can omit the shufflevector <0, undef,...>. So the + // input to the other shufflevector <1, undef> must match with one of the + // inputs to the current binary operation. + // Example: + // %NextLevelOpL = shufflevector %R, <1, undef ...> + // %BinOp = fadd %NextLevelOpL, %R + if (NextLevelOpL && NextLevelOpL != R) + return false; + else if (NextLevelOpR && NextLevelOpR != L) + return false; + + NextLevelOp = NextLevelOpL ? R : L; + } else + return false; + + // Check that the next levels binary operation exists and matches with the + // current one. + BinaryOperator *NextLevelBinOp = 0; + if (Level + 1 != NumLevels) { + if (!(NextLevelBinOp = dyn_cast<BinaryOperator>(NextLevelOp))) + return false; + else if (NextLevelBinOp->getOpcode() != Opcode) + return false; + } + + // Shuffle mask for pairwise operation must match. + if (matchPairwiseShuffleMask(LS, true, Level)) { + if (!matchPairwiseShuffleMask(RS, false, Level)) + return false; + } else if (matchPairwiseShuffleMask(RS, true, Level)) { + if (!matchPairwiseShuffleMask(LS, false, Level)) + return false; + } else + return false; + + if (++Level == NumLevels) + return true; + + // Match next level. + return matchPairwiseReductionAtLevel(NextLevelBinOp, Level, NumLevels); +} + +static bool matchPairwiseReduction(const ExtractElementInst *ReduxRoot, + unsigned &Opcode, Type *&Ty) { + if (!EnableReduxCost) + return false; + + // Need to extract the first element. + ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); + unsigned Idx = ~0u; + if (CI) + Idx = CI->getZExtValue(); + if (Idx != 0) + return false; + + BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0)); + if (!RdxStart) + return false; + + Type *VecTy = ReduxRoot->getOperand(0)->getType(); + unsigned NumVecElems = VecTy->getVectorNumElements(); + if (!isPowerOf2_32(NumVecElems)) + return false; + + // We look for a sequence of shuffle,shuffle,add triples like the following + // that builds a pairwise reduction tree. + // + // (X0, X1, X2, X3) + // (X0 + X1, X2 + X3, undef, undef) + // ((X0 + X1) + (X2 + X3), undef, undef, undef) + // + // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> + // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> + // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 + // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, + // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> + // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, + // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> + // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 + // %r = extractelement <4 x float> %bin.rdx8, i32 0 + if (!matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems))) + return false; + + Opcode = RdxStart->getOpcode(); + Ty = VecTy; + + return true; +} + +static std::pair<Value *, ShuffleVectorInst *> +getShuffleAndOtherOprd(BinaryOperator *B) { + + Value *L = B->getOperand(0); + Value *R = B->getOperand(1); + ShuffleVectorInst *S = 0; + + if ((S = dyn_cast<ShuffleVectorInst>(L))) + return std::make_pair(R, S); + + S = dyn_cast<ShuffleVectorInst>(R); + return std::make_pair(L, S); +} + +static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, + unsigned &Opcode, Type *&Ty) { + if (!EnableReduxCost) + return false; + + // Need to extract the first element. + ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); + unsigned Idx = ~0u; + if (CI) + Idx = CI->getZExtValue(); + if (Idx != 0) + return false; + + BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0)); + if (!RdxStart) + return false; + unsigned RdxOpcode = RdxStart->getOpcode(); + + Type *VecTy = ReduxRoot->getOperand(0)->getType(); + unsigned NumVecElems = VecTy->getVectorNumElements(); + if (!isPowerOf2_32(NumVecElems)) + return false; + + // We look for a sequence of shuffles and adds like the following matching one + // fadd, shuffle vector pair at a time. + // + // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> + // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf + // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, + // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> + // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 + // %r = extractelement <4 x float> %bin.rdx8, i32 0 + + unsigned MaskStart = 1; + Value *RdxOp = RdxStart; + SmallVector<int, 32> ShuffleMask(NumVecElems, 0); + unsigned NumVecElemsRemain = NumVecElems; + while (NumVecElemsRemain - 1) { + // Check for the right reduction operation. + BinaryOperator *BinOp; + if (!(BinOp = dyn_cast<BinaryOperator>(RdxOp))) + return false; + if (BinOp->getOpcode() != RdxOpcode) + return false; + + Value *NextRdxOp; + ShuffleVectorInst *Shuffle; + tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp); + + // Check the current reduction operation and the shuffle use the same value. + if (Shuffle == 0) + return false; + if (Shuffle->getOperand(0) != NextRdxOp) + return false; + + // Check that shuffle masks matches. + for (unsigned j = 0; j != MaskStart; ++j) + ShuffleMask[j] = MaskStart + j; + // Fill the rest of the mask with -1 for undef. + std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1); + + SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); + if (!matchMask(ShuffleMask, Mask)) + return false; + + RdxOp = NextRdxOp; + NumVecElemsRemain /= 2; + MaskStart *= 2; + } + + Opcode = RdxOpcode; + Ty = VecTy; + return true; +} + unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { if (!TTI) return -1; @@ -189,18 +449,29 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { unsigned Idx = -1; if (CI) Idx = CI->getZExtValue(); + + // Try to match a reduction sequence (series of shufflevector and vector + // adds followed by a extractelement). + unsigned ReduxOpCode; + Type *ReduxType; + + if (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) + return TTI->getReductionCost(ReduxOpCode, ReduxType, false); + else if (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) + return TTI->getReductionCost(ReduxOpCode, ReduxType, true); + return TTI->getVectorInstrCost(I->getOpcode(), EEI->getOperand(0)->getType(), Idx); } case Instruction::InsertElement: { - const InsertElementInst * IE = cast<InsertElementInst>(I); - ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); - unsigned Idx = -1; - if (CI) - Idx = CI->getZExtValue(); - return TTI->getVectorInstrCost(I->getOpcode(), - IE->getType(), Idx); - } + const InsertElementInst * IE = cast<InsertElementInst>(I); + ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + return TTI->getVectorInstrCost(I->getOpcode(), + IE->getType(), Idx); + } case Instruction::ShuffleVector: { const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); Type *VecTypOp0 = Shuffle->getOperand(0)->getType(); diff --git a/contrib/llvm/lib/Analysis/Delinearization.cpp b/contrib/llvm/lib/Analysis/Delinearization.cpp new file mode 100644 index 0000000..3ed0609 --- /dev/null +++ b/contrib/llvm/lib/Analysis/Delinearization.cpp @@ -0,0 +1,133 @@ +//===---- Delinearization.cpp - MultiDimensional Index Delinearization ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements an analysis pass that tries to delinearize all GEP +// instructions in all loops using the SCEV analysis functionality. This pass is +// only used for testing purposes: if your pass needs delinearization, please +// use the on-demand SCEVAddRecExpr::delinearize() function. +// +//===----------------------------------------------------------------------===// + +#define DL_NAME "delinearize" +#define DEBUG_TYPE DL_NAME +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Pass.h" +#include "llvm/IR/Type.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + +class Delinearization : public FunctionPass { + Delinearization(const Delinearization &); // do not implement +protected: + Function *F; + LoopInfo *LI; + ScalarEvolution *SE; + +public: + static char ID; // Pass identification, replacement for typeid + + Delinearization() : FunctionPass(ID) { + initializeDelinearizationPass(*PassRegistry::getPassRegistry()); + } + virtual bool runOnFunction(Function &F); + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual void print(raw_ostream &O, const Module *M = 0) const; +}; + +} // end anonymous namespace + +void Delinearization::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<LoopInfo>(); + AU.addRequired<ScalarEvolution>(); +} + +bool Delinearization::runOnFunction(Function &F) { + this->F = &F; + SE = &getAnalysis<ScalarEvolution>(); + LI = &getAnalysis<LoopInfo>(); + return false; +} + +static Value *getPointerOperand(Instruction &Inst) { + if (LoadInst *Load = dyn_cast<LoadInst>(&Inst)) + return Load->getPointerOperand(); + else if (StoreInst *Store = dyn_cast<StoreInst>(&Inst)) + return Store->getPointerOperand(); + else if (GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(&Inst)) + return Gep->getPointerOperand(); + return NULL; +} + +void Delinearization::print(raw_ostream &O, const Module *) const { + O << "Delinearization on function " << F->getName() << ":\n"; + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + Instruction *Inst = &(*I); + + // Only analyze loads and stores. + if (!isa<StoreInst>(Inst) && !isa<LoadInst>(Inst) && + !isa<GetElementPtrInst>(Inst)) + continue; + + const BasicBlock *BB = Inst->getParent(); + // Delinearize the memory access as analyzed in all the surrounding loops. + // Do not analyze memory accesses outside loops. + for (Loop *L = LI->getLoopFor(BB); L != NULL; L = L->getParentLoop()) { + const SCEV *AccessFn = SE->getSCEVAtScope(getPointerOperand(*Inst), L); + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(AccessFn); + + // Do not try to delinearize memory accesses that are not AddRecs. + if (!AR) + break; + + O << "AddRec: " << *AR << "\n"; + + SmallVector<const SCEV *, 3> Subscripts, Sizes; + const SCEV *Res = AR->delinearize(*SE, Subscripts, Sizes); + int Size = Subscripts.size(); + if (Res == AR || Size == 0) { + O << "failed to delinearize\n"; + continue; + } + O << "Base offset: " << *Res << "\n"; + O << "ArrayDecl[UnknownSize]"; + for (int i = 0; i < Size - 1; i++) + O << "[" << *Sizes[i] << "]"; + O << " with elements of " << *Sizes[Size - 1] << " bytes.\n"; + + O << "ArrayRef"; + for (int i = 0; i < Size; i++) + O << "[" << *Subscripts[i] << "]"; + O << "\n"; + } + } +} + +char Delinearization::ID = 0; +static const char delinearization_name[] = "Delinearization"; +INITIALIZE_PASS_BEGIN(Delinearization, DL_NAME, delinearization_name, true, + true) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_END(Delinearization, DL_NAME, delinearization_name, true, true) + +FunctionPass *llvm::createDelinearizationPass() { return new Delinearization; } diff --git a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp index cbc71bd..3b3e2ef 100644 --- a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -24,11 +24,11 @@ // Both of these are conservative weaknesses; // that is, not a source of correctness problems. // -// The implementation depends on the GEP instruction to -// differentiate subscripts. Since Clang linearizes subscripts -// for most arrays, we give up some precision (though the existing MIV tests -// will help). We trust that the GEP instruction will eventually be extended. -// In the meantime, we should explore Maslov's ideas about delinearization. +// The implementation depends on the GEP instruction to differentiate +// subscripts. Since Clang linearizes some array subscripts, the dependence +// analysis is using SCEV->delinearize to recover the representation of multiple +// subscripts, and thus avoid the more expensive and less precise MIV tests. The +// delinearization is controlled by the flag -da-delinearize. // // We should pay some careful attention to the possibility of integer overflow // in the implementation of the various tests. This could happen with Add, @@ -61,6 +61,7 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Operator.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/InstIterator.h" @@ -104,6 +105,10 @@ STATISTIC(BanerjeeApplications, "Banerjee applications"); STATISTIC(BanerjeeIndependence, "Banerjee independence"); STATISTIC(BanerjeeSuccesses, "Banerjee successes"); +static cl::opt<bool> +Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore, + cl::desc("Try to delinearize array references.")); + //===----------------------------------------------------------------------===// // basics @@ -508,7 +513,7 @@ bool DependenceAnalysis::intersectConstraints(Constraint *X, APInt Xr = Xtop; // though they're just going to be overwritten APInt::sdivrem(Xtop, Xbot, Xq, Xr); APInt Yq = Ytop; - APInt Yr = Ytop;; + APInt Yr = Ytop; APInt::sdivrem(Ytop, Ybot, Yq, Yr); if (Xr != 0 || Yr != 0) { X->setEmpty(); @@ -2951,6 +2956,11 @@ const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr, AddRec->getLoop(), AddRec->getNoWrapFlags()); } + if (SE->isLoopInvariant(AddRec, TargetLoop)) + return SE->getAddRecExpr(AddRec, + Value, + TargetLoop, + SCEV::FlagAnyWrap); return SE->getAddRecExpr(addToCoefficient(AddRec->getStart(), TargetLoop, Value), AddRec->getStepRecurrence(*SE), @@ -2972,7 +2982,7 @@ const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr, bool DependenceAnalysis::propagate(const SCEV *&Src, const SCEV *&Dst, SmallBitVector &Loops, - SmallVector<Constraint, 4> &Constraints, + SmallVectorImpl<Constraint> &Constraints, bool &Consistent) { bool Result = false; for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) { @@ -3166,6 +3176,55 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, llvm_unreachable("constraint has unexpected kind"); } +/// Check if we can delinearize the subscripts. If the SCEVs representing the +/// source and destination array references are recurrences on a nested loop, +/// this function flattens the nested recurrences into seperate recurrences +/// for each loop level. +bool +DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV, + SmallVectorImpl<Subscript> &Pair) const { + const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV); + const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); + if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) + return false; + + SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts, SrcSizes, DstSizes; + SrcAR->delinearize(*SE, SrcSubscripts, SrcSizes); + DstAR->delinearize(*SE, DstSubscripts, DstSizes); + + int size = SrcSubscripts.size(); + int dstSize = DstSubscripts.size(); + if (size != dstSize || size < 2) + return false; + +#ifndef NDEBUG + DEBUG(errs() << "\nSrcSubscripts: "); + for (int i = 0; i < size; i++) + DEBUG(errs() << *SrcSubscripts[i]); + DEBUG(errs() << "\nDstSubscripts: "); + for (int i = 0; i < size; i++) + DEBUG(errs() << *DstSubscripts[i]); +#endif + + // The delinearization transforms a single-subscript MIV dependence test into + // a multi-subscript SIV dependence test that is easier to compute. So we + // resize Pair to contain as many pairs of subscripts as the delinearization + // has found, and then initialize the pairs following the delinearization. + Pair.resize(size); + for (int i = 0; i < size; ++i) { + Pair[i].Src = SrcSubscripts[i]; + Pair[i].Dst = DstSubscripts[i]; + + // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the + // delinearization has found, and add these constraints to the dependence + // check to avoid memory accesses overflow from one dimension into another. + // This is related to the problem of determining the existence of data + // dependences in array accesses using a different number of subscripts: in + // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc. + } + + return true; +} //===----------------------------------------------------------------------===// @@ -3275,6 +3334,12 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, Pair[0].Dst = DstSCEV; } + if (Delinearize && Pairs == 1 && CommonLevels > 1 && + tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) { + DEBUG(dbgs() << " delinerized GEP\n"); + Pairs = Pair.size(); + } + for (unsigned P = 0; P < Pairs; ++P) { Pair[P].Loops.resize(MaxLevels + 1); Pair[P].GroupLoops.resize(MaxLevels + 1); @@ -3693,6 +3758,12 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, Pair[0].Dst = DstSCEV; } + if (Delinearize && Pairs == 1 && CommonLevels > 1 && + tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) { + DEBUG(dbgs() << " delinerized GEP\n"); + Pairs = Pair.size(); + } + for (unsigned P = 0; P < Pairs; ++P) { Pair[P].Loops.resize(MaxLevels + 1); Pair[P].GroupLoops.resize(MaxLevels + 1); diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp index 7620fd9..f042964 100644 --- a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp +++ b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp @@ -6,11 +6,6 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file implements the CallGraph class and provides the BasicCallGraph -// default implementation. -// -//===----------------------------------------------------------------------===// #include "llvm/Analysis/CallGraph.h" #include "llvm/IR/Instructions.h" @@ -21,168 +16,92 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -namespace { +CallGraph::CallGraph() + : ModulePass(ID), Root(0), ExternalCallingNode(0), CallsExternalNode(0) { + initializeCallGraphPass(*PassRegistry::getPassRegistry()); +} -//===----------------------------------------------------------------------===// -// BasicCallGraph class definition -// -class BasicCallGraph : public ModulePass, public CallGraph { - // Root is root of the call graph, or the external node if a 'main' function - // couldn't be found. - // - CallGraphNode *Root; - - // ExternalCallingNode - This node has edges to all external functions and - // those internal functions that have their address taken. - CallGraphNode *ExternalCallingNode; - - // CallsExternalNode - This node has edges to it from all functions making - // indirect calls or calling an external function. - CallGraphNode *CallsExternalNode; - -public: - static char ID; // Class identification, replacement for typeinfo - BasicCallGraph() : ModulePass(ID), Root(0), - ExternalCallingNode(0), CallsExternalNode(0) { - initializeBasicCallGraphPass(*PassRegistry::getPassRegistry()); - } +void CallGraph::addToCallGraph(Function *F) { + CallGraphNode *Node = getOrInsertFunction(F); - // runOnModule - Compute the call graph for the specified module. - virtual bool runOnModule(Module &M) { - CallGraph::initialize(M); - - ExternalCallingNode = getOrInsertFunction(0); - CallsExternalNode = new CallGraphNode(0); - Root = 0; - - // Add every function to the call graph. - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - addToCallGraph(I); - - // If we didn't find a main function, use the external call graph node - if (Root == 0) Root = ExternalCallingNode; - - return false; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - } + // If this function has external linkage, anything could call it. + if (!F->hasLocalLinkage()) { + ExternalCallingNode->addCalledFunction(CallSite(), Node); - virtual void print(raw_ostream &OS, const Module *) const { - OS << "CallGraph Root is: "; - if (Function *F = getRoot()->getFunction()) - OS << F->getName() << "\n"; - else { - OS << "<<null function: 0x" << getRoot() << ">>\n"; + // Found the entry point? + if (F->getName() == "main") { + if (Root) // Found multiple external mains? Don't pick one. + Root = ExternalCallingNode; + else + Root = Node; // Found a main, keep track of it! } - - CallGraph::print(OS, 0); } - virtual void releaseMemory() { - destroy(); - } - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it should - /// override this to adjust the this pointer as needed for the specified pass - /// info. - virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { - if (PI == &CallGraph::ID) - return (CallGraph*)this; - return this; - } - - CallGraphNode* getExternalCallingNode() const { return ExternalCallingNode; } - CallGraphNode* getCallsExternalNode() const { return CallsExternalNode; } - - // getRoot - Return the root of the call graph, which is either main, or if - // main cannot be found, the external node. - // - CallGraphNode *getRoot() { return Root; } - const CallGraphNode *getRoot() const { return Root; } - -private: - //===--------------------------------------------------------------------- - // Implementation of CallGraph construction - // - - // addToCallGraph - Add a function to the call graph, and link the node to all - // of the functions that it calls. - // - void addToCallGraph(Function *F) { - CallGraphNode *Node = getOrInsertFunction(F); - - // If this function has external linkage, anything could call it. - if (!F->hasLocalLinkage()) { - ExternalCallingNode->addCalledFunction(CallSite(), Node); - - // Found the entry point? - if (F->getName() == "main") { - if (Root) // Found multiple external mains? Don't pick one. - Root = ExternalCallingNode; - else - Root = Node; // Found a main, keep track of it! + // If this function has its address taken, anything could call it. + if (F->hasAddressTaken()) + ExternalCallingNode->addCalledFunction(CallSite(), Node); + + // If this function is not defined in this translation unit, it could call + // anything. + if (F->isDeclaration() && !F->isIntrinsic()) + Node->addCalledFunction(CallSite(), CallsExternalNode); + + // Look for calls by this function. + for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB) + for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; + ++II) { + CallSite CS(cast<Value>(II)); + if (CS) { + const Function *Callee = CS.getCalledFunction(); + if (!Callee) + // Indirect calls of intrinsics are not allowed so no need to check. + Node->addCalledFunction(CS, CallsExternalNode); + else if (!Callee->isIntrinsic()) + Node->addCalledFunction(CS, getOrInsertFunction(Callee)); } } +} - // If this function has its address taken, anything could call it. - if (F->hasAddressTaken()) - ExternalCallingNode->addCalledFunction(CallSite(), Node); - - // If this function is not defined in this translation unit, it could call - // anything. - if (F->isDeclaration() && !F->isIntrinsic()) - Node->addCalledFunction(CallSite(), CallsExternalNode); - - // Look for calls by this function. - for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB) - for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); - II != IE; ++II) { - CallSite CS(cast<Value>(II)); - if (CS) { - const Function *Callee = CS.getCalledFunction(); - if (!Callee) - // Indirect calls of intrinsics are not allowed so no need to check. - Node->addCalledFunction(CS, CallsExternalNode); - else if (!Callee->isIntrinsic()) - Node->addCalledFunction(CS, getOrInsertFunction(Callee)); - } - } - } +void CallGraph::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); +} - // - // destroy - Release memory for the call graph - virtual void destroy() { - /// CallsExternalNode is not in the function map, delete it explicitly. - if (CallsExternalNode) { - CallsExternalNode->allReferencesDropped(); - delete CallsExternalNode; - CallsExternalNode = 0; - } - CallGraph::destroy(); - } -}; +bool CallGraph::runOnModule(Module &M) { + Mod = &M; -} //End anonymous namespace + ExternalCallingNode = getOrInsertFunction(0); + assert(!CallsExternalNode); + CallsExternalNode = new CallGraphNode(0); + Root = 0; -INITIALIZE_ANALYSIS_GROUP(CallGraph, "Call Graph", BasicCallGraph) -INITIALIZE_AG_PASS(BasicCallGraph, CallGraph, "basiccg", - "Basic CallGraph Construction", false, true, true) + // Add every function to the call graph. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + addToCallGraph(I); -char CallGraph::ID = 0; -char BasicCallGraph::ID = 0; + // If we didn't find a main function, use the external call graph node + if (Root == 0) + Root = ExternalCallingNode; -void CallGraph::initialize(Module &M) { - Mod = &M; + return false; } -void CallGraph::destroy() { - if (FunctionMap.empty()) return; - - // Reset all node's use counts to zero before deleting them to prevent an - // assertion from firing. +INITIALIZE_PASS(CallGraph, "basiccg", "CallGraph Construction", false, true) + +char CallGraph::ID = 0; + +void CallGraph::releaseMemory() { + /// CallsExternalNode is not in the function map, delete it explicitly. + if (CallsExternalNode) { + CallsExternalNode->allReferencesDropped(); + delete CallsExternalNode; + CallsExternalNode = 0; + } + + if (FunctionMap.empty()) + return; + +// Reset all node's use counts to zero before deleting them to prevent an +// assertion from firing. #ifndef NDEBUG for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end(); I != E; ++I) @@ -195,7 +114,14 @@ void CallGraph::destroy() { FunctionMap.clear(); } -void CallGraph::print(raw_ostream &OS, Module*) const { +void CallGraph::print(raw_ostream &OS, const Module*) const { + OS << "CallGraph Root is: "; + if (Function *F = Root->getFunction()) + OS << F->getName() << "\n"; + else { + OS << "<<null function: 0x" << Root << ">>\n"; + } + for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I) I->second->print(OS); } diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp index a0d788f..182beca 100644 --- a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -22,7 +22,7 @@ #include "llvm/Analysis/CallGraph.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/PassManagers.h" +#include "llvm/IR/LegacyPassManagers.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" diff --git a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp index 92d0d23..7ec4644 100644 --- a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp @@ -189,7 +189,7 @@ char GlobalsModRef::ID = 0; INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis, "globalsmodref-aa", "Simple mod/ref analysis for globals", false, true, false) -INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_DEPENDENCY(CallGraph) INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis, "globalsmodref-aa", "Simple mod/ref analysis for globals", false, true, false) diff --git a/contrib/llvm/lib/Analysis/IPA/IPA.cpp b/contrib/llvm/lib/Analysis/IPA/IPA.cpp index 1c1816d..47357cf 100644 --- a/contrib/llvm/lib/Analysis/IPA/IPA.cpp +++ b/contrib/llvm/lib/Analysis/IPA/IPA.cpp @@ -19,8 +19,7 @@ using namespace llvm; /// initializeIPA - Initialize all passes linked into the IPA library. void llvm::initializeIPA(PassRegistry &Registry) { - initializeBasicCallGraphPass(Registry); - initializeCallGraphAnalysisGroup(Registry); + initializeCallGraphPass(Registry); initializeCallGraphPrinterPass(Registry); initializeCallGraphViewerPass(Registry); initializeFindUsedTypesPass(Registry); diff --git a/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp b/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp index 35c45e6..3bc796e 100644 --- a/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp +++ b/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp @@ -59,6 +59,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { bool ExposesReturnsTwice; bool HasDynamicAlloca; bool ContainsNoDuplicateCall; + bool HasReturn; + bool HasIndirectBr; /// Number of bytes allocated statically by the callee. uint64_t AllocatedSize; @@ -124,7 +126,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { bool visitIntToPtr(IntToPtrInst &I); bool visitCastInst(CastInst &I); bool visitUnaryInstruction(UnaryInstruction &I); - bool visitICmp(ICmpInst &I); + bool visitCmpInst(CmpInst &I); bool visitSub(BinaryOperator &I); bool visitBinaryOperator(BinaryOperator &I); bool visitLoad(LoadInst &I); @@ -132,6 +134,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { bool visitExtractValue(ExtractValueInst &I); bool visitInsertValue(InsertValueInst &I); bool visitCallSite(CallSite CS); + bool visitReturnInst(ReturnInst &RI); + bool visitBranchInst(BranchInst &BI); + bool visitSwitchInst(SwitchInst &SI); + bool visitIndirectBrInst(IndirectBrInst &IBI); + bool visitResumeInst(ResumeInst &RI); + bool visitUnreachableInst(UnreachableInst &I); public: CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI, @@ -139,12 +147,13 @@ public: : TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0), IsCallerRecursive(false), IsRecursiveCall(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), - ContainsNoDuplicateCall(false), AllocatedSize(0), NumInstructions(0), - NumVectorInstructions(0), FiftyPercentVectorBonus(0), - TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), - NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), - NumConstantPtrDiffs(0), NumInstructionsSimplified(0), - SROACostSavings(0), SROACostSavingsLost(0) {} + ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), + AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), + FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), + NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), + NumConstantPtrCmps(0), NumConstantPtrDiffs(0), + NumInstructionsSimplified(0), SROACostSavings(0), + SROACostSavingsLost(0) {} bool analyzeCall(CallSite CS); @@ -490,7 +499,7 @@ bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { return false; } -bool CallAnalyzer::visitICmp(ICmpInst &I) { +bool CallAnalyzer::visitCmpInst(CmpInst &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); // First try to handle simplified comparisons. if (!isa<Constant>(LHS)) @@ -499,12 +508,16 @@ bool CallAnalyzer::visitICmp(ICmpInst &I) { if (!isa<Constant>(RHS)) if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) RHS = SimpleRHS; - if (Constant *CLHS = dyn_cast<Constant>(LHS)) + if (Constant *CLHS = dyn_cast<Constant>(LHS)) { if (Constant *CRHS = dyn_cast<Constant>(RHS)) - if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { + if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) { SimplifiedValues[&I] = C; return true; } + } + + if (I.getOpcode() == Instruction::FCmp) + return false; // Otherwise look for a comparison between constant offset pointers with // a common base. @@ -700,7 +713,7 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { } bool CallAnalyzer::visitCallSite(CallSite CS) { - if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() && + if (CS.hasFnAttr(Attribute::ReturnsTwice) && !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::ReturnsTwice)) { // This aborts the entire analysis. @@ -781,6 +794,60 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { return Base::visitCallSite(CS); } +bool CallAnalyzer::visitReturnInst(ReturnInst &RI) { + // At least one return instruction will be free after inlining. + bool Free = !HasReturn; + HasReturn = true; + return Free; +} + +bool CallAnalyzer::visitBranchInst(BranchInst &BI) { + // We model unconditional branches as essentially free -- they really + // shouldn't exist at all, but handling them makes the behavior of the + // inliner more regular and predictable. Interestingly, conditional branches + // which will fold away are also free. + return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) || + dyn_cast_or_null<ConstantInt>( + SimplifiedValues.lookup(BI.getCondition())); +} + +bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { + // We model unconditional switches as free, see the comments on handling + // branches. + return isa<ConstantInt>(SI.getCondition()) || + dyn_cast_or_null<ConstantInt>( + SimplifiedValues.lookup(SI.getCondition())); +} + +bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) { + // We never want to inline functions that contain an indirectbr. This is + // incorrect because all the blockaddress's (in static global initializers + // for example) would be referring to the original function, and this + // indirect jump would jump from the inlined copy of the function into the + // original function which is extremely undefined behavior. + // FIXME: This logic isn't really right; we can safely inline functions with + // indirectbr's as long as no other function or global references the + // blockaddress of a block within the current function. And as a QOI issue, + // if someone is using a blockaddress without an indirectbr, and that + // reference somehow ends up in another function or global, we probably don't + // want to inline this function. + HasIndirectBr = true; + return false; +} + +bool CallAnalyzer::visitResumeInst(ResumeInst &RI) { + // FIXME: It's not clear that a single instruction is an accurate model for + // the inline cost of a resume instruction. + return false; +} + +bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) { + // FIXME: It might be reasonably to discount the cost of instructions leading + // to unreachable as they have the lowest possible impact on both runtime and + // code size. + return true; // No actual code is needed for unreachable. +} + bool CallAnalyzer::visitInstruction(Instruction &I) { // Some instructions are free. All of the free intrinsics can also be // handled by SROA, etc. @@ -804,8 +871,7 @@ bool CallAnalyzer::visitInstruction(Instruction &I) { /// construct has been detected. It returns false if inlining is no longer /// viable, and true if inlining remains viable. bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { - for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end()); - I != E; ++I) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { ++NumInstructions; if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy()) ++NumVectorInstructions; @@ -821,7 +887,8 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { Cost += InlineConstants::InstrCost; // If the visit this instruction detected an uninlinable pattern, abort. - if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) + if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || + HasIndirectBr) return false; // If the caller is a recursive function then we don't want to inline @@ -985,10 +1052,6 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { } } - // Track whether we've seen a return instruction. The first return - // instruction is free, as at least one will usually disappear in inlining. - bool HasReturn = false; - // Populate our simplified values by mapping from function arguments to call // arguments with known important simplifications. CallSite::arg_iterator CAI = CS.arg_begin(); @@ -1035,33 +1098,11 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { if (BB->empty()) continue; - // Handle the terminator cost here where we can track returns and other - // function-wide constructs. - TerminatorInst *TI = BB->getTerminator(); - - // We never want to inline functions that contain an indirectbr. This is - // incorrect because all the blockaddress's (in static global initializers - // for example) would be referring to the original function, and this - // indirect jump would jump from the inlined copy of the function into the - // original function which is extremely undefined behavior. - // FIXME: This logic isn't really right; we can safely inline functions - // with indirectbr's as long as no other function or global references the - // blockaddress of a block within the current function. And as a QOI issue, - // if someone is using a blockaddress without an indirectbr, and that - // reference somehow ends up in another function or global, we probably - // don't want to inline this function. - if (isa<IndirectBrInst>(TI)) - return false; - - if (!HasReturn && isa<ReturnInst>(TI)) - HasReturn = true; - else - Cost += InlineConstants::InstrCost; - // Analyze the cost of this block. If we blow through the threshold, this // returns false, and we can bail on out. if (!analyzeBlock(BB)) { - if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) + if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || + HasIndirectBr) return false; // If the caller is a recursive function then we don't want to inline @@ -1074,6 +1115,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { break; } + TerminatorInst *TI = BB->getTerminator(); + // Add in the live successors by first checking whether we have terminator // that may be simplified based on the values simplified by this call. if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { @@ -1167,6 +1210,22 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) { return getInlineCost(CS, CS.getCalledFunction(), Threshold); } +/// \brief Test that two functions either have or have not the given attribute +/// at the same time. +static bool attributeMatches(Function *F1, Function *F2, + Attribute::AttrKind Attr) { + return F1->hasFnAttribute(Attr) == F2->hasFnAttribute(Attr); +} + +/// \brief Test that there are no attribute conflicts between Caller and Callee +/// that prevent inlining. +static bool functionsHaveCompatibleAttributes(Function *Caller, + Function *Callee) { + return attributeMatches(Caller, Callee, Attribute::SanitizeAddress) && + attributeMatches(Caller, Callee, Attribute::SanitizeMemory) && + attributeMatches(Caller, Callee, Attribute::SanitizeThread); +} + InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, int Threshold) { // Cannot inline indirect calls. @@ -1175,20 +1234,26 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, // Calls to functions with always-inline attributes should be inlined // whenever possible. - if (Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::AlwaysInline)) { + if (Callee->hasFnAttribute(Attribute::AlwaysInline)) { if (isInlineViable(*Callee)) return llvm::InlineCost::getAlways(); return llvm::InlineCost::getNever(); } + // Never inline functions with conflicting attributes (unless callee has + // always-inline attribute). + if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee)) + return llvm::InlineCost::getNever(); + + // Don't inline this call if the caller has the optnone attribute. + if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone)) + return llvm::InlineCost::getNever(); + // Don't inline functions which can be redefined at link-time to mean // something else. Don't inline functions marked noinline or call sites // marked noinline. if (Callee->mayBeOverridden() || - Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::NoInline) || - CS.isNoInline()) + Callee->hasFnAttribute(Attribute::NoInline) || CS.isNoInline()) return llvm::InlineCost::getNever(); DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp index bf77451..b867af1 100644 --- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp @@ -668,7 +668,8 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, /// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc. /// folding. static Constant *stripAndComputeConstantOffsets(const DataLayout *TD, - Value *&V) { + Value *&V, + bool AllowNonInbounds = false) { assert(V->getType()->getScalarType()->isPointerTy()); // Without DataLayout, just be conservative for now. Theoretically, more could @@ -676,8 +677,8 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *TD, if (!TD) return ConstantInt::get(IntegerType::get(V->getContext(), 64), 0); - unsigned IntPtrWidth = TD->getPointerSizeInBits(); - APInt Offset = APInt::getNullValue(IntPtrWidth); + Type *IntPtrTy = TD->getIntPtrType(V->getType())->getScalarType(); + APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth()); // Even though we don't look through PHI nodes, we could be called on an // instruction in an unreachable block, which may be on a cycle. @@ -685,7 +686,8 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *TD, Visited.insert(V); do { if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { - if (!GEP->isInBounds() || !GEP->accumulateConstantOffset(*TD, Offset)) + if ((!AllowNonInbounds && !GEP->isInBounds()) || + !GEP->accumulateConstantOffset(*TD, Offset)) break; V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { @@ -701,7 +703,6 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *TD, "Unexpected operand type!"); } while (Visited.insert(V)); - Type *IntPtrTy = TD->getIntPtrType(V->getContext()); Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset); if (V->getType()->isVectorTy()) return ConstantVector::getSplat(V->getType()->getVectorNumElements(), @@ -1363,6 +1364,10 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, Q, MaxRecurse)) return V; + // X >> X -> 0 + if (Op0 == Op1) + return Constant::getNullValue(Op0->getType()); + // undef >>l X -> 0 if (match(Op0, m_Undef())) return Constant::getNullValue(Op0->getType()); @@ -1391,6 +1396,10 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, Q, MaxRecurse)) return V; + // X >> X -> 0 + if (Op0 == Op1) + return Constant::getNullValue(Op0->getType()); + // all ones >>a X -> all ones if (match(Op0, m_AllOnes())) return Op0; @@ -1730,7 +1739,7 @@ static Constant *computePointerICmp(const DataLayout *TD, RHS = RHS->stripPointerCasts(); // A non-null pointer is not equal to a null pointer. - if (llvm::isKnownNonNull(LHS) && isa<ConstantPointerNull>(RHS) && + if (llvm::isKnownNonNull(LHS, TLI) && isa<ConstantPointerNull>(RHS) && (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE)) return ConstantInt::get(GetCompareTy(LHS), !CmpInst::isTrueWhenEqual(Pred)); @@ -1830,6 +1839,17 @@ static Constant *computePointerICmp(const DataLayout *TD, return ConstantInt::get(GetCompareTy(LHS), !CmpInst::isTrueWhenEqual(Pred)); } + + // Even if an non-inbounds GEP occurs along the path we can still optimize + // equality comparisons concerning the result. We avoid walking the whole + // chain again by starting where the last calls to + // stripAndComputeConstantOffsets left off and accumulate the offsets. + Constant *LHSNoBound = stripAndComputeConstantOffsets(TD, LHS, true); + Constant *RHSNoBound = stripAndComputeConstantOffsets(TD, RHS, true); + if (LHS == RHS) + return ConstantExpr::getICmp(Pred, + ConstantExpr::getAdd(LHSOffset, LHSNoBound), + ConstantExpr::getAdd(RHSOffset, RHSNoBound)); } // Otherwise, fail. @@ -2026,7 +2046,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input // if the integer type is the same size as the pointer type. if (MaxRecurse && Q.TD && isa<PtrToIntInst>(LI) && - Q.TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) { + Q.TD->getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) { if (Constant *RHSC = dyn_cast<Constant>(RHS)) { // Transfer the cast to the constant. if (Value *V = SimplifyICmpInst(Pred, SrcOp, @@ -2238,6 +2258,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } + // icmp pred (urem X, Y), Y if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) { bool KnownNonNegative, KnownNegative; switch (Pred) { @@ -2245,7 +2266,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD); + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD); if (!KnownNonNegative) break; // fall-through @@ -2255,7 +2276,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getFalse(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD); + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD); if (!KnownNonNegative) break; // fall-through @@ -2265,6 +2286,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getTrue(ITy); } } + + // icmp pred X, (urem Y, X) if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) { bool KnownNonNegative, KnownNegative; switch (Pred) { @@ -2272,7 +2295,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD); + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD); if (!KnownNonNegative) break; // fall-through @@ -2282,7 +2305,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getTrue(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD); + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD); if (!KnownNonNegative) break; // fall-through @@ -2936,6 +2959,7 @@ static bool IsIdempotent(Intrinsic::ID ID) { case Intrinsic::trunc: case Intrinsic::rint: case Intrinsic::nearbyint: + case Intrinsic::round: return true; } } diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp index 66b5e85..b6970af 100644 --- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp @@ -421,8 +421,8 @@ void LVIValueHandle::deleted() { if (I->second == getValPtr()) ToErase.push_back(*I); } - - for (SmallVector<OverDefinedPairTy, 4>::iterator I = ToErase.begin(), + + for (SmallVectorImpl<OverDefinedPairTy>::iterator I = ToErase.begin(), E = ToErase.end(); I != E; ++I) Parent->OverDefinedCache.erase(*I); @@ -444,8 +444,8 @@ void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { if (I->first == BB) ToErase.push_back(*I); } - - for (SmallVector<OverDefinedPairTy, 4>::iterator I = ToErase.begin(), + + for (SmallVectorImpl<OverDefinedPairTy>::iterator I = ToErase.begin(), E = ToErase.end(); I != E; ++I) OverDefinedCache.erase(*I); diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp index 9393508..ec17f47 100644 --- a/contrib/llvm/lib/Analysis/Lint.cpp +++ b/contrib/llvm/lib/Analysis/Lint.cpp @@ -207,7 +207,7 @@ void Lint::visitCallSite(CallSite CS) { &I); FunctionType *FT = F->getFunctionType(); - unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin()); + unsigned NumActualArgs = CS.arg_size(); Assert1(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs : @@ -504,14 +504,42 @@ void Lint::visitShl(BinaryOperator &I) { "Undefined result: Shift count out of range", &I); } -static bool isZero(Value *V, DataLayout *TD) { +static bool isZero(Value *V, DataLayout *DL) { // Assume undef could be zero. - if (isa<UndefValue>(V)) return true; + if (isa<UndefValue>(V)) + return true; + + VectorType *VecTy = dyn_cast<VectorType>(V->getType()); + if (!VecTy) { + unsigned BitWidth = V->getType()->getIntegerBitWidth(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(V, KnownZero, KnownOne, DL); + return KnownZero.isAllOnesValue(); + } + + // Per-component check doesn't work with zeroinitializer + Constant *C = dyn_cast<Constant>(V); + if (!C) + return false; + + if (C->isZeroValue()) + return true; + + // For a vector, KnownZero will only be true if all values are zero, so check + // this per component + unsigned BitWidth = VecTy->getElementType()->getIntegerBitWidth(); + for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) { + Constant *Elem = C->getAggregateElement(I); + if (isa<UndefValue>(Elem)) + return true; + + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(Elem, KnownZero, KnownOne, DL); + if (KnownZero.isAllOnesValue()) + return true; + } - unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(V, KnownZero, KnownOne, TD); - return KnownZero.isAllOnesValue(); + return false; } void Lint::visitSDiv(BinaryOperator &I) { diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp index f1ad650..e369633 100644 --- a/contrib/llvm/lib/Analysis/LoopInfo.cpp +++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp @@ -50,6 +50,9 @@ INITIALIZE_PASS_BEGIN(LoopInfo, "loops", "Natural Loop Information", true, true) INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_PASS_END(LoopInfo, "loops", "Natural Loop Information", true, true) +// Loop identifier metadata name. +static const char *const LoopMDName = "llvm.loop"; + //===----------------------------------------------------------------------===// // Loop implementation // @@ -174,10 +177,6 @@ PHINode *Loop::getCanonicalInductionVariable() const { /// isLCSSAForm - Return true if the Loop is in LCSSA form bool Loop::isLCSSAForm(DominatorTree &DT) const { - // Sort the blocks vector so that we can use binary search to do quick - // lookups. - SmallPtrSet<BasicBlock*, 16> LoopBBs(block_begin(), block_end()); - for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) { BasicBlock *BB = *BI; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I) @@ -193,7 +192,7 @@ bool Loop::isLCSSAForm(DominatorTree &DT) const { // block they are defined in. Also, blocks not reachable from the // entry are special; uses in them don't need to go through PHIs. if (UserBB != BB && - !LoopBBs.count(UserBB) && + !contains(UserBB) && DT.isReachableFromEntry(UserBB)) return false; } @@ -217,12 +216,12 @@ bool Loop::isSafeToClone() const { // Return false if any loop blocks contain indirectbrs, or there are any calls // to noduplicate functions. for (Loop::block_iterator I = block_begin(), E = block_end(); I != E; ++I) { - if (isa<IndirectBrInst>((*I)->getTerminator())) { + if (isa<IndirectBrInst>((*I)->getTerminator())) return false; - } else if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator())) { + + if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator())) if (II->hasFnAttr(Attribute::NoDuplicate)) return false; - } for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) { if (const CallInst *CI = dyn_cast<CallInst>(BI)) { @@ -234,14 +233,62 @@ bool Loop::isSafeToClone() const { return true; } -bool Loop::isAnnotatedParallel() const { +MDNode *Loop::getLoopID() const { + MDNode *LoopID = 0; + if (isLoopSimplifyForm()) { + LoopID = getLoopLatch()->getTerminator()->getMetadata(LoopMDName); + } else { + // Go through each predecessor of the loop header and check the + // terminator for the metadata. + BasicBlock *H = getHeader(); + for (block_iterator I = block_begin(), IE = block_end(); I != IE; ++I) { + TerminatorInst *TI = (*I)->getTerminator(); + MDNode *MD = 0; + + // Check if this terminator branches to the loop header. + for (unsigned i = 0, ie = TI->getNumSuccessors(); i != ie; ++i) { + if (TI->getSuccessor(i) == H) { + MD = TI->getMetadata(LoopMDName); + break; + } + } + if (!MD) + return 0; - BasicBlock *latch = getLoopLatch(); - if (latch == NULL) - return false; + if (!LoopID) + LoopID = MD; + else if (MD != LoopID) + return 0; + } + } + if (!LoopID || LoopID->getNumOperands() == 0 || + LoopID->getOperand(0) != LoopID) + return 0; + return LoopID; +} - MDNode *desiredLoopIdMetadata = - latch->getTerminator()->getMetadata("llvm.loop.parallel"); +void Loop::setLoopID(MDNode *LoopID) const { + assert(LoopID && "Loop ID should not be null"); + assert(LoopID->getNumOperands() > 0 && "Loop ID needs at least one operand"); + assert(LoopID->getOperand(0) == LoopID && "Loop ID should refer to itself"); + + if (isLoopSimplifyForm()) { + getLoopLatch()->getTerminator()->setMetadata(LoopMDName, LoopID); + return; + } + + BasicBlock *H = getHeader(); + for (block_iterator I = block_begin(), IE = block_end(); I != IE; ++I) { + TerminatorInst *TI = (*I)->getTerminator(); + for (unsigned i = 0, ie = TI->getNumSuccessors(); i != ie; ++i) { + if (TI->getSuccessor(i) == H) + TI->setMetadata(LoopMDName, LoopID); + } + } +} + +bool Loop::isAnnotatedParallel() const { + MDNode *desiredLoopIdMetadata = getLoopID(); if (!desiredLoopIdMetadata) return false; @@ -258,15 +305,15 @@ bool Loop::isAnnotatedParallel() const { if (!II->mayReadOrWriteMemory()) continue; - if (!II->getMetadata("llvm.mem.parallel_loop_access")) - return false; - // The memory instruction can refer to the loop identifier metadata // directly or indirectly through another list metadata (in case of // nested parallel loops). The loop identifier metadata refers to // itself so we can check both cases with the same routine. - MDNode *loopIdMD = - dyn_cast<MDNode>(II->getMetadata("llvm.mem.parallel_loop_access")); + MDNode *loopIdMD = II->getMetadata("llvm.mem.parallel_loop_access"); + + if (!loopIdMD) + return false; + bool loopIdMDFound = false; for (unsigned i = 0, e = loopIdMD->getNumOperands(); i < e; ++i) { if (loopIdMD->getOperand(i) == desiredLoopIdMetadata) { @@ -286,9 +333,6 @@ bool Loop::isAnnotatedParallel() const { /// hasDedicatedExits - Return true if no exit block for the loop /// has a predecessor that is outside the loop. bool Loop::hasDedicatedExits() const { - // Sort the blocks vector so that we can use binary search to do quick - // lookups. - SmallPtrSet<BasicBlock *, 16> LoopBBs(block_begin(), block_end()); // Each predecessor of each exit block of a normal loop is contained // within the loop. SmallVector<BasicBlock *, 4> ExitBlocks; @@ -296,7 +340,7 @@ bool Loop::hasDedicatedExits() const { for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) for (pred_iterator PI = pred_begin(ExitBlocks[i]), PE = pred_end(ExitBlocks[i]); PI != PE; ++PI) - if (!LoopBBs.count(*PI)) + if (!contains(*PI)) return false; // All the requirements are met. return true; @@ -311,11 +355,6 @@ Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const { assert(hasDedicatedExits() && "getUniqueExitBlocks assumes the loop has canonical form exits!"); - // Sort the blocks vector so that we can use binary search to do quick - // lookups. - SmallVector<BasicBlock *, 128> LoopBBs(block_begin(), block_end()); - std::sort(LoopBBs.begin(), LoopBBs.end()); - SmallVector<BasicBlock *, 32> switchExitBlocks; for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) { @@ -325,7 +364,7 @@ Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const { for (succ_iterator I = succ_begin(*BI), E = succ_end(*BI); I != E; ++I) { // If block is inside the loop then it is not a exit block. - if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I)) + if (contains(*I)) continue; pred_iterator PI = pred_begin(*I); diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp index 1540112..acf2ba6 100644 --- a/contrib/llvm/lib/Analysis/LoopPass.cpp +++ b/contrib/llvm/lib/Analysis/LoopPass.cpp @@ -188,6 +188,10 @@ bool LPPassManager::runOnFunction(Function &F) { // advantage in deleting uses in a later loop before optimizing the // definitions in an earlier loop. If we find a clear reason to process in // forward order, then a forward variant of LoopPassManager should be created. + // + // Note that LoopInfo::iterator visits loops in reverse program + // order. Here, reverse_iterator gives us a forward order, and the LoopQueue + // reverses the order a third time by popping from the back. for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) addLoopIntoQueue(*I, LQ); diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp index 9c0d8ac..1db0f63 100644 --- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -31,12 +31,13 @@ using namespace llvm; enum AllocType { - MallocLike = 1<<0, // allocates - CallocLike = 1<<1, // allocates + bzero - ReallocLike = 1<<2, // reallocates - StrDupLike = 1<<3, + OpNewLike = 1<<0, // allocates; never returns null + MallocLike = 1<<1 | OpNewLike, // allocates; may return null + CallocLike = 1<<2, // allocates + bzero + ReallocLike = 1<<3, // reallocates + StrDupLike = 1<<4, AllocLike = MallocLike | CallocLike | StrDupLike, - AnyAlloc = MallocLike | CallocLike | ReallocLike | StrDupLike + AnyAlloc = AllocLike | ReallocLike }; struct AllocFnsTy { @@ -52,20 +53,20 @@ struct AllocFnsTy { static const AllocFnsTy AllocationFnData[] = { {LibFunc::malloc, MallocLike, 1, 0, -1}, {LibFunc::valloc, MallocLike, 1, 0, -1}, - {LibFunc::Znwj, MallocLike, 1, 0, -1}, // new(unsigned int) + {LibFunc::Znwj, OpNewLike, 1, 0, -1}, // new(unsigned int) {LibFunc::ZnwjRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new(unsigned int, nothrow) - {LibFunc::Znwm, MallocLike, 1, 0, -1}, // new(unsigned long) + {LibFunc::Znwm, OpNewLike, 1, 0, -1}, // new(unsigned long) {LibFunc::ZnwmRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new(unsigned long, nothrow) - {LibFunc::Znaj, MallocLike, 1, 0, -1}, // new[](unsigned int) + {LibFunc::Znaj, OpNewLike, 1, 0, -1}, // new[](unsigned int) {LibFunc::ZnajRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow) - {LibFunc::Znam, MallocLike, 1, 0, -1}, // new[](unsigned long) + {LibFunc::Znam, OpNewLike, 1, 0, -1}, // new[](unsigned long) {LibFunc::ZnamRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned long, nothrow) - {LibFunc::posix_memalign, MallocLike, 3, 2, -1}, {LibFunc::calloc, CallocLike, 2, 0, 1}, {LibFunc::realloc, ReallocLike, 2, 1, -1}, {LibFunc::reallocf, ReallocLike, 2, 1, -1}, {LibFunc::strdup, StrDupLike, 1, -1, -1}, {LibFunc::strndup, StrDupLike, 2, 1, -1} + // TODO: Handle "int posix_memalign(void **, size_t, size_t)" }; @@ -77,6 +78,9 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) { if (!CS.getInstruction()) return 0; + if (CS.isNoBuiltin()) + return 0; + Function *Callee = CS.getCalledFunction(); if (!Callee || !Callee->isDeclaration()) return 0; @@ -114,7 +118,7 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, return 0; const AllocFnsTy *FnData = &AllocationFnData[i]; - if ((FnData->AllocTy & AllocTy) == 0) + if ((FnData->AllocTy & AllocTy) != FnData->AllocTy) return 0; // Check function prototype. @@ -186,6 +190,13 @@ bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, return getAllocationData(V, ReallocLike, TLI, LookThroughBitCast); } +/// \brief Tests if a value is a call or invoke to a library function that +/// allocates memory and never returns null (such as operator new). +bool llvm::isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, OpNewLike, TLI, LookThroughBitCast); +} + /// extractMallocCall - Returns the corresponding CallInst if the instruction /// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we /// ignore InvokeInst here. @@ -194,7 +205,7 @@ const CallInst *llvm::extractMallocCall(const Value *I, return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : 0; } -static Value *computeArraySize(const CallInst *CI, const DataLayout *TD, +static Value *computeArraySize(const CallInst *CI, const DataLayout *DL, const TargetLibraryInfo *TLI, bool LookThroughSExt = false) { if (!CI) @@ -202,12 +213,12 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *TD, // The size of the malloc's result type must be known to determine array size. Type *T = getMallocAllocatedType(CI, TLI); - if (!T || !T->isSized() || !TD) + if (!T || !T->isSized() || !DL) return 0; - unsigned ElementSize = TD->getTypeAllocSize(T); + unsigned ElementSize = DL->getTypeAllocSize(T); if (StructType *ST = dyn_cast<StructType>(T)) - ElementSize = TD->getStructLayout(ST)->getSizeInBytes(); + ElementSize = DL->getStructLayout(ST)->getSizeInBytes(); // If malloc call's arg can be determined to be a multiple of ElementSize, // return the multiple. Otherwise, return NULL. @@ -224,10 +235,10 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *TD, /// is a call to malloc whose array size can be determined and the array size /// is not constant 1. Otherwise, return NULL. const CallInst *llvm::isArrayMalloc(const Value *I, - const DataLayout *TD, + const DataLayout *DL, const TargetLibraryInfo *TLI) { const CallInst *CI = extractMallocCall(I, TLI); - Value *ArraySize = computeArraySize(CI, TD, TLI); + Value *ArraySize = computeArraySize(CI, DL, TLI); if (ConstantInt *ConstSize = dyn_cast_or_null<ConstantInt>(ArraySize)) if (ConstSize->isOne()) @@ -285,11 +296,11 @@ Type *llvm::getMallocAllocatedType(const CallInst *CI, /// then return that multiple. For non-array mallocs, the multiple is /// constant 1. Otherwise, return NULL for mallocs whose array size cannot be /// determined. -Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout *TD, +Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout *DL, const TargetLibraryInfo *TLI, bool LookThroughSExt) { assert(isMallocLikeFn(CI, TLI) && "getMallocArraySize and not malloc call"); - return computeArraySize(CI, TD, TLI, LookThroughSExt); + return computeArraySize(CI, DL, TLI, LookThroughSExt); } @@ -315,9 +326,15 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn)) return 0; - if (TLIFn != LibFunc::free && - TLIFn != LibFunc::ZdlPv && // operator delete(void*) - TLIFn != LibFunc::ZdaPv) // operator delete[](void*) + unsigned ExpectedNumParams; + if (TLIFn == LibFunc::free || + TLIFn == LibFunc::ZdlPv || // operator delete(void*) + TLIFn == LibFunc::ZdaPv) // operator delete[](void*) + ExpectedNumParams = 1; + else if (TLIFn == LibFunc::ZdlPvRKSt9nothrow_t || // delete(void*, nothrow) + TLIFn == LibFunc::ZdaPvRKSt9nothrow_t) // delete[](void*, nothrow) + ExpectedNumParams = 2; + else return 0; // Check free prototype. @@ -326,7 +343,7 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { FunctionType *FTy = Callee->getFunctionType(); if (!FTy->getReturnType()->isVoidTy()) return 0; - if (FTy->getNumParams() != 1) + if (FTy->getNumParams() != ExpectedNumParams) return 0; if (FTy->getParamType(0) != Type::getInt8PtrTy(Callee->getContext())) return 0; @@ -345,12 +362,12 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { /// object size in Size if successful, and false otherwise. /// If RoundToAlign is true, then Size is rounded up to the aligment of allocas, /// byval arguments, and global variables. -bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *TD, +bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *DL, const TargetLibraryInfo *TLI, bool RoundToAlign) { - if (!TD) + if (!DL) return false; - ObjectSizeOffsetVisitor Visitor(TD, TLI, Ptr->getContext(), RoundToAlign); + ObjectSizeOffsetVisitor Visitor(DL, TLI, Ptr->getContext(), RoundToAlign); SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr)); if (!Visitor.bothKnown(Data)) return false; @@ -377,12 +394,12 @@ APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) { return Size; } -ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *TD, +ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *DL, const TargetLibraryInfo *TLI, LLVMContext &Context, bool RoundToAlign) -: TD(TD), TLI(TLI), RoundToAlign(RoundToAlign) { - IntegerType *IntTy = TD->getIntPtrType(Context); +: DL(DL), TLI(TLI), RoundToAlign(RoundToAlign) { + IntegerType *IntTy = DL->getIntPtrType(Context); IntTyBits = IntTy->getBitWidth(); Zero = APInt::getNullValue(IntTyBits); } @@ -425,7 +442,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { if (!I.getAllocatedType()->isSized()) return unknown(); - APInt Size(IntTyBits, TD->getTypeAllocSize(I.getAllocatedType())); + APInt Size(IntTyBits, DL->getTypeAllocSize(I.getAllocatedType())); if (!I.isArrayAllocation()) return std::make_pair(align(Size, I.getAlignment()), Zero); @@ -444,7 +461,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) { return unknown(); } PointerType *PT = cast<PointerType>(A.getType()); - APInt Size(IntTyBits, TD->getTypeAllocSize(PT->getElementType())); + APInt Size(IntTyBits, DL->getTypeAllocSize(PT->getElementType())); return std::make_pair(align(Size, A.getParamAlignment()), Zero); } @@ -517,7 +534,7 @@ ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) { SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) { SizeOffsetType PtrData = compute(GEP.getPointerOperand()); APInt Offset(IntTyBits, 0); - if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(*TD, Offset)) + if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(*DL, Offset)) return unknown(); return std::make_pair(PtrData.first, PtrData.second + Offset); @@ -533,7 +550,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){ if (!GV.hasDefinitiveInitializer()) return unknown(); - APInt Size(IntTyBits, TD->getTypeAllocSize(GV.getType()->getElementType())); + APInt Size(IntTyBits, DL->getTypeAllocSize(GV.getType()->getElementType())); return std::make_pair(align(Size, GV.getAlignment()), Zero); } @@ -569,12 +586,13 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) { return unknown(); } - -ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *TD, - const TargetLibraryInfo *TLI, - LLVMContext &Context) -: TD(TD), TLI(TLI), Context(Context), Builder(Context, TargetFolder(TD)) { - IntTy = TD->getIntPtrType(Context); +ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *DL, + const TargetLibraryInfo *TLI, + LLVMContext &Context, + bool RoundToAlign) +: DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)), + RoundToAlign(RoundToAlign) { + IntTy = DL->getIntPtrType(Context); Zero = ConstantInt::get(IntTy, 0); } @@ -598,7 +616,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) { } SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) { - ObjectSizeOffsetVisitor Visitor(TD, TLI, Context); + ObjectSizeOffsetVisitor Visitor(DL, TLI, Context, RoundToAlign); SizeOffsetType Const = Visitor.compute(V); if (Visitor.bothKnown(Const)) return std::make_pair(ConstantInt::get(Context, Const.first), @@ -617,13 +635,15 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) { if (Instruction *I = dyn_cast<Instruction>(V)) Builder.SetInsertPoint(I); - // record the pointers that were handled in this run, so that they can be - // cleaned later if something fails - SeenVals.insert(V); - // now compute the size and offset SizeOffsetEvalType Result; - if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { + + // Record the pointers that were handled in this run, so that they can be + // cleaned later if something fails. We also use this set to break cycles that + // can occur in dead code. + if (!SeenVals.insert(V)) { + Result = unknown(); + } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { Result = visitGEPOperator(*GEP); } else if (Instruction *I = dyn_cast<Instruction>(V)) { Result = visit(*I); @@ -656,7 +676,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) { assert(I.isArrayAllocation()); Value *ArraySize = I.getArraySize(); Value *Size = ConstantInt::get(ArraySize->getType(), - TD->getTypeAllocSize(I.getAllocatedType())); + DL->getTypeAllocSize(I.getAllocatedType())); Size = Builder.CreateMul(Size, ArraySize); return std::make_pair(Size, Zero); } @@ -708,7 +728,7 @@ ObjectSizeOffsetEvaluator::visitGEPOperator(GEPOperator &GEP) { if (!bothKnown(PtrData)) return unknown(); - Value *Offset = EmitGEPOffset(&Builder, *TD, &GEP, /*NoAssumptions=*/true); + Value *Offset = EmitGEPOffset(&Builder, *DL, &GEP, /*NoAssumptions=*/true); Offset = Builder.CreateAdd(PtrData.second, Offset); return std::make_pair(PtrData.first, Offset); } diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index c0009cb..84ff2ee 100644 --- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -1,4 +1,4 @@ -//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation --*- C++ -*-===// +//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===// // // The LLVM Compiler Infrastructure // @@ -89,7 +89,7 @@ bool MemoryDependenceAnalysis::runOnFunction(Function &) { AA = &getAnalysis<AliasAnalysis>(); TD = getAnalysisIfAvailable<DataLayout>(); DT = getAnalysisIfAvailable<DominatorTree>(); - if (PredCache == 0) + if (!PredCache) PredCache.reset(new PredIteratorCache()); return false; } @@ -371,18 +371,19 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // Walk backwards through the basic block, looking for dependencies. while (ScanIt != BB->begin()) { + Instruction *Inst = --ScanIt; + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) + // Debug intrinsics don't (and can't) cause dependencies. + if (isa<DbgInfoIntrinsic>(II)) continue; + // Limit the amount of scanning we do so we don't end up with quadratic // running time on extreme testcases. --Limit; if (!Limit) return MemDepResult::getUnknown(); - Instruction *Inst = --ScanIt; - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { - // Debug intrinsics don't (and can't) cause dependences. - if (isa<DbgInfoIntrinsic>(II)) continue; - // If we reach a lifetime begin or end marker, then the query ends here // because the value is undefined. if (II->getIntrinsicID() == Intrinsic::lifetime_start) { diff --git a/contrib/llvm/lib/Analysis/PathNumbering.cpp b/contrib/llvm/lib/Analysis/PathNumbering.cpp deleted file mode 100644 index 30d213b..0000000 --- a/contrib/llvm/lib/Analysis/PathNumbering.cpp +++ /dev/null @@ -1,521 +0,0 @@ -//===- PathNumbering.cpp --------------------------------------*- C++ -*---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Ball-Larus path numbers uniquely identify paths through a directed acyclic -// graph (DAG) [Ball96]. For a CFG backedges are removed and replaced by phony -// edges to obtain a DAG, and thus the unique path numbers [Ball96]. -// -// The purpose of this analysis is to enumerate the edges in a CFG in order -// to obtain paths from path numbers in a convenient manner. As described in -// [Ball96] edges can be enumerated such that given a path number by following -// the CFG and updating the path number, the path is obtained. -// -// [Ball96] -// T. Ball and J. R. Larus. "Efficient Path Profiling." -// International Symposium on Microarchitecture, pages 46-57, 1996. -// http://portal.acm.org/citation.cfm?id=243857 -// -//===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ball-larus-numbering" - -#include "llvm/Analysis/PathNumbering.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/TypeBuilder.h" -#include "llvm/Pass.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include <queue> -#include <sstream> -#include <stack> -#include <string> -#include <utility> - -using namespace llvm; - -// Are we enabling early termination -static cl::opt<bool> ProcessEarlyTermination( - "path-profile-early-termination", cl::Hidden, - cl::desc("In path profiling, insert extra instrumentation to account for " - "unexpected function termination.")); - -// Returns the basic block for the BallLarusNode -BasicBlock* BallLarusNode::getBlock() { - return(_basicBlock); -} - -// Returns the number of paths to the exit starting at the node. -unsigned BallLarusNode::getNumberPaths() { - return(_numberPaths); -} - -// Sets the number of paths to the exit starting at the node. -void BallLarusNode::setNumberPaths(unsigned numberPaths) { - _numberPaths = numberPaths; -} - -// Gets the NodeColor used in graph algorithms. -BallLarusNode::NodeColor BallLarusNode::getColor() { - return(_color); -} - -// Sets the NodeColor used in graph algorithms. -void BallLarusNode::setColor(BallLarusNode::NodeColor color) { - _color = color; -} - -// Returns an iterator over predecessor edges. Includes phony and -// backedges. -BLEdgeIterator BallLarusNode::predBegin() { - return(_predEdges.begin()); -} - -// Returns the end sentinel for the predecessor iterator. -BLEdgeIterator BallLarusNode::predEnd() { - return(_predEdges.end()); -} - -// Returns the number of predecessor edges. Includes phony and -// backedges. -unsigned BallLarusNode::getNumberPredEdges() { - return(_predEdges.size()); -} - -// Returns an iterator over successor edges. Includes phony and -// backedges. -BLEdgeIterator BallLarusNode::succBegin() { - return(_succEdges.begin()); -} - -// Returns the end sentinel for the successor iterator. -BLEdgeIterator BallLarusNode::succEnd() { - return(_succEdges.end()); -} - -// Returns the number of successor edges. Includes phony and -// backedges. -unsigned BallLarusNode::getNumberSuccEdges() { - return(_succEdges.size()); -} - -// Add an edge to the predecessor list. -void BallLarusNode::addPredEdge(BallLarusEdge* edge) { - _predEdges.push_back(edge); -} - -// Remove an edge from the predecessor list. -void BallLarusNode::removePredEdge(BallLarusEdge* edge) { - removeEdge(_predEdges, edge); -} - -// Add an edge to the successor list. -void BallLarusNode::addSuccEdge(BallLarusEdge* edge) { - _succEdges.push_back(edge); -} - -// Remove an edge from the successor list. -void BallLarusNode::removeSuccEdge(BallLarusEdge* edge) { - removeEdge(_succEdges, edge); -} - -// Returns the name of the BasicBlock being represented. If BasicBlock -// is null then returns "<null>". If BasicBlock has no name, then -// "<unnamed>" is returned. Intended for use with debug output. -std::string BallLarusNode::getName() { - std::stringstream name; - - if(getBlock() != NULL) { - if(getBlock()->hasName()) { - std::string tempName(getBlock()->getName()); - name << tempName.c_str() << " (" << _uid << ")"; - } else - name << "<unnamed> (" << _uid << ")"; - } else - name << "<null> (" << _uid << ")"; - - return name.str(); -} - -// Removes an edge from an edgeVector. Used by removePredEdge and -// removeSuccEdge. -void BallLarusNode::removeEdge(BLEdgeVector& v, BallLarusEdge* e) { - // TODO: Avoid linear scan by using a set instead - for(BLEdgeIterator i = v.begin(), - end = v.end(); - i != end; - ++i) { - if((*i) == e) { - v.erase(i); - break; - } - } -} - -// Returns the source node of this edge. -BallLarusNode* BallLarusEdge::getSource() const { - return(_source); -} - -// Returns the target node of this edge. -BallLarusNode* BallLarusEdge::getTarget() const { - return(_target); -} - -// Sets the type of the edge. -BallLarusEdge::EdgeType BallLarusEdge::getType() const { - return _edgeType; -} - -// Gets the type of the edge. -void BallLarusEdge::setType(EdgeType type) { - _edgeType = type; -} - -// Returns the weight of this edge. Used to decode path numbers to sequences -// of basic blocks. -unsigned BallLarusEdge::getWeight() { - return(_weight); -} - -// Sets the weight of the edge. Used during path numbering. -void BallLarusEdge::setWeight(unsigned weight) { - _weight = weight; -} - -// Gets the phony edge originating at the root. -BallLarusEdge* BallLarusEdge::getPhonyRoot() { - return _phonyRoot; -} - -// Sets the phony edge originating at the root. -void BallLarusEdge::setPhonyRoot(BallLarusEdge* phonyRoot) { - _phonyRoot = phonyRoot; -} - -// Gets the phony edge terminating at the exit. -BallLarusEdge* BallLarusEdge::getPhonyExit() { - return _phonyExit; -} - -// Sets the phony edge terminating at the exit. -void BallLarusEdge::setPhonyExit(BallLarusEdge* phonyExit) { - _phonyExit = phonyExit; -} - -// Gets the associated real edge if this is a phony edge. -BallLarusEdge* BallLarusEdge::getRealEdge() { - return _realEdge; -} - -// Sets the associated real edge if this is a phony edge. -void BallLarusEdge::setRealEdge(BallLarusEdge* realEdge) { - _realEdge = realEdge; -} - -// Returns the duplicate number of the edge. -unsigned BallLarusEdge::getDuplicateNumber() { - return(_duplicateNumber); -} - -// Initialization that requires virtual functions which are not fully -// functional in the constructor. -void BallLarusDag::init() { - BLBlockNodeMap inDag; - std::stack<BallLarusNode*> dfsStack; - - _root = addNode(&(_function.getEntryBlock())); - _exit = addNode(NULL); - - // start search from root - dfsStack.push(getRoot()); - - // dfs to add each bb into the dag - while(dfsStack.size()) - buildNode(inDag, dfsStack); - - // put in the final edge - addEdge(getExit(),getRoot(),0); -} - -// Frees all memory associated with the DAG. -BallLarusDag::~BallLarusDag() { - for(BLEdgeIterator edge = _edges.begin(), end = _edges.end(); edge != end; - ++edge) - delete (*edge); - - for(BLNodeIterator node = _nodes.begin(), end = _nodes.end(); node != end; - ++node) - delete (*node); -} - -// Calculate the path numbers by assigning edge increments as prescribed -// in Ball-Larus path profiling. -void BallLarusDag::calculatePathNumbers() { - BallLarusNode* node; - std::queue<BallLarusNode*> bfsQueue; - bfsQueue.push(getExit()); - - while(bfsQueue.size() > 0) { - node = bfsQueue.front(); - - DEBUG(dbgs() << "calculatePathNumbers on " << node->getName() << "\n"); - - bfsQueue.pop(); - unsigned prevPathNumber = node->getNumberPaths(); - calculatePathNumbersFrom(node); - - // Check for DAG splitting - if( node->getNumberPaths() > 100000000 && node != getRoot() ) { - // Add new phony edge from the split-node to the DAG's exit - BallLarusEdge* exitEdge = addEdge(node, getExit(), 0); - exitEdge->setType(BallLarusEdge::SPLITEDGE_PHONY); - - // Counters to handle the possibility of a multi-graph - BasicBlock* oldTarget = 0; - unsigned duplicateNumber = 0; - - // Iterate through each successor edge, adding phony edges - for( BLEdgeIterator succ = node->succBegin(), end = node->succEnd(); - succ != end; oldTarget = (*succ)->getTarget()->getBlock(), succ++ ) { - - if( (*succ)->getType() == BallLarusEdge::NORMAL ) { - // is this edge a duplicate? - if( oldTarget != (*succ)->getTarget()->getBlock() ) - duplicateNumber = 0; - - // create the new phony edge: root -> succ - BallLarusEdge* rootEdge = - addEdge(getRoot(), (*succ)->getTarget(), duplicateNumber++); - rootEdge->setType(BallLarusEdge::SPLITEDGE_PHONY); - rootEdge->setRealEdge(*succ); - - // split on this edge and reference it's exit/root phony edges - (*succ)->setType(BallLarusEdge::SPLITEDGE); - (*succ)->setPhonyRoot(rootEdge); - (*succ)->setPhonyExit(exitEdge); - (*succ)->setWeight(0); - } - } - - calculatePathNumbersFrom(node); - } - - DEBUG(dbgs() << "prev, new number paths " << prevPathNumber << ", " - << node->getNumberPaths() << ".\n"); - - if(prevPathNumber == 0 && node->getNumberPaths() != 0) { - DEBUG(dbgs() << "node ready : " << node->getName() << "\n"); - for(BLEdgeIterator pred = node->predBegin(), end = node->predEnd(); - pred != end; pred++) { - if( (*pred)->getType() == BallLarusEdge::BACKEDGE || - (*pred)->getType() == BallLarusEdge::SPLITEDGE ) - continue; - - BallLarusNode* nextNode = (*pred)->getSource(); - // not yet visited? - if(nextNode->getNumberPaths() == 0) - bfsQueue.push(nextNode); - } - } - } - - DEBUG(dbgs() << "\tNumber of paths: " << getRoot()->getNumberPaths() << "\n"); -} - -// Returns the number of paths for the Dag. -unsigned BallLarusDag::getNumberOfPaths() { - return(getRoot()->getNumberPaths()); -} - -// Returns the root (i.e. entry) node for the DAG. -BallLarusNode* BallLarusDag::getRoot() { - return _root; -} - -// Returns the exit node for the DAG. -BallLarusNode* BallLarusDag::getExit() { - return _exit; -} - -// Returns the function for the DAG. -Function& BallLarusDag::getFunction() { - return(_function); -} - -// Clears the node colors. -void BallLarusDag::clearColors(BallLarusNode::NodeColor color) { - for (BLNodeIterator nodeIt = _nodes.begin(); nodeIt != _nodes.end(); nodeIt++) - (*nodeIt)->setColor(color); -} - -// Processes one node and its imediate edges for building the DAG. -void BallLarusDag::buildNode(BLBlockNodeMap& inDag, BLNodeStack& dfsStack) { - BallLarusNode* currentNode = dfsStack.top(); - BasicBlock* currentBlock = currentNode->getBlock(); - - if(currentNode->getColor() != BallLarusNode::WHITE) { - // we have already visited this node - dfsStack.pop(); - currentNode->setColor(BallLarusNode::BLACK); - } else { - // are there any external procedure calls? - if( ProcessEarlyTermination ) { - for( BasicBlock::iterator bbCurrent = currentNode->getBlock()->begin(), - bbEnd = currentNode->getBlock()->end(); bbCurrent != bbEnd; - bbCurrent++ ) { - Instruction& instr = *bbCurrent; - if( instr.getOpcode() == Instruction::Call ) { - BallLarusEdge* callEdge = addEdge(currentNode, getExit(), 0); - callEdge->setType(BallLarusEdge::CALLEDGE_PHONY); - break; - } - } - } - - TerminatorInst* terminator = currentNode->getBlock()->getTerminator(); - if(isa<ReturnInst>(terminator) || isa<UnreachableInst>(terminator) || - isa<ResumeInst>(terminator)) - addEdge(currentNode, getExit(),0); - - currentNode->setColor(BallLarusNode::GRAY); - inDag[currentBlock] = currentNode; - - BasicBlock* oldSuccessor = 0; - unsigned duplicateNumber = 0; - - // iterate through this node's successors - for(succ_iterator successor = succ_begin(currentBlock), - succEnd = succ_end(currentBlock); successor != succEnd; - oldSuccessor = *successor, ++successor ) { - BasicBlock* succBB = *successor; - - // is this edge a duplicate? - if (oldSuccessor == succBB) - duplicateNumber++; - else - duplicateNumber = 0; - - buildEdge(inDag, dfsStack, currentNode, succBB, duplicateNumber); - } - } -} - -// Process an edge in the CFG for DAG building. -void BallLarusDag::buildEdge(BLBlockNodeMap& inDag, std::stack<BallLarusNode*>& - dfsStack, BallLarusNode* currentNode, - BasicBlock* succBB, unsigned duplicateCount) { - BallLarusNode* succNode = inDag[succBB]; - - if(succNode && succNode->getColor() == BallLarusNode::BLACK) { - // visited node and forward edge - addEdge(currentNode, succNode, duplicateCount); - } else if(succNode && succNode->getColor() == BallLarusNode::GRAY) { - // visited node and back edge - DEBUG(dbgs() << "Backedge detected.\n"); - addBackedge(currentNode, succNode, duplicateCount); - } else { - BallLarusNode* childNode; - // not visited node and forward edge - if(succNode) // an unvisited node that is child of a gray node - childNode = succNode; - else { // an unvisited node that is a child of a an unvisted node - childNode = addNode(succBB); - inDag[succBB] = childNode; - } - addEdge(currentNode, childNode, duplicateCount); - dfsStack.push(childNode); - } -} - -// The weight on each edge is the increment required along any path that -// contains that edge. -void BallLarusDag::calculatePathNumbersFrom(BallLarusNode* node) { - if(node == getExit()) - // The Exit node must be base case - node->setNumberPaths(1); - else { - unsigned sumPaths = 0; - BallLarusNode* succNode; - - for(BLEdgeIterator succ = node->succBegin(), end = node->succEnd(); - succ != end; succ++) { - if( (*succ)->getType() == BallLarusEdge::BACKEDGE || - (*succ)->getType() == BallLarusEdge::SPLITEDGE ) - continue; - - (*succ)->setWeight(sumPaths); - succNode = (*succ)->getTarget(); - - if( !succNode->getNumberPaths() ) - return; - sumPaths += succNode->getNumberPaths(); - } - - node->setNumberPaths(sumPaths); - } -} - -// Allows subclasses to determine which type of Node is created. -// Override this method to produce subclasses of BallLarusNode if -// necessary. The destructor of BallLarusDag will call free on each -// pointer created. -BallLarusNode* BallLarusDag::createNode(BasicBlock* BB) { - return( new BallLarusNode(BB) ); -} - -// Allows subclasses to determine which type of Edge is created. -// Override this method to produce subclasses of BallLarusEdge if -// necessary. The destructor of BallLarusDag will call free on each -// pointer created. -BallLarusEdge* BallLarusDag::createEdge(BallLarusNode* source, - BallLarusNode* target, - unsigned duplicateCount) { - return( new BallLarusEdge(source, target, duplicateCount) ); -} - -// Proxy to node's constructor. Updates the DAG state. -BallLarusNode* BallLarusDag::addNode(BasicBlock* BB) { - BallLarusNode* newNode = createNode(BB); - _nodes.push_back(newNode); - return( newNode ); -} - -// Proxy to edge's constructor. Updates the DAG state. -BallLarusEdge* BallLarusDag::addEdge(BallLarusNode* source, - BallLarusNode* target, - unsigned duplicateCount) { - BallLarusEdge* newEdge = createEdge(source, target, duplicateCount); - _edges.push_back(newEdge); - source->addSuccEdge(newEdge); - target->addPredEdge(newEdge); - return(newEdge); -} - -// Adds a backedge with its phony edges. Updates the DAG state. -void BallLarusDag::addBackedge(BallLarusNode* source, BallLarusNode* target, - unsigned duplicateCount) { - BallLarusEdge* childEdge = addEdge(source, target, duplicateCount); - childEdge->setType(BallLarusEdge::BACKEDGE); - - childEdge->setPhonyRoot(addEdge(getRoot(), target,0)); - childEdge->setPhonyExit(addEdge(source, getExit(),0)); - - childEdge->getPhonyRoot()->setRealEdge(childEdge); - childEdge->getPhonyRoot()->setType(BallLarusEdge::BACKEDGE_PHONY); - - childEdge->getPhonyExit()->setRealEdge(childEdge); - childEdge->getPhonyExit()->setType(BallLarusEdge::BACKEDGE_PHONY); - _backEdges.push_back(childEdge); -} diff --git a/contrib/llvm/lib/Analysis/PathProfileInfo.cpp b/contrib/llvm/lib/Analysis/PathProfileInfo.cpp deleted file mode 100644 index bc53221..0000000 --- a/contrib/llvm/lib/Analysis/PathProfileInfo.cpp +++ /dev/null @@ -1,433 +0,0 @@ -//===- PathProfileInfo.cpp ------------------------------------*- C++ -*---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interface used by optimizers to load path profiles, -// and provides a loader pass which reads a path profile file. -// -//===----------------------------------------------------------------------===// -#define DEBUG_TYPE "path-profile-info" - -#include "llvm/Analysis/PathProfileInfo.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/ProfileInfoTypes.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include <cstdio> - -using namespace llvm; - -// command line option for loading path profiles -static cl::opt<std::string> -PathProfileInfoFilename("path-profile-loader-file", cl::init("llvmprof.out"), - cl::value_desc("filename"), - cl::desc("Path profile file loaded by -path-profile-loader"), cl::Hidden); - -namespace { - class PathProfileLoaderPass : public ModulePass, public PathProfileInfo { - public: - PathProfileLoaderPass() : ModulePass(ID) { } - ~PathProfileLoaderPass(); - - // this pass doesn't change anything (only loads information) - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - } - - // the full name of the loader pass - virtual const char* getPassName() const { - return "Path Profiling Information Loader"; - } - - // required since this pass implements multiple inheritance - virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { - if (PI == &PathProfileInfo::ID) - return (PathProfileInfo*)this; - return this; - } - - // entry point to run the pass - bool runOnModule(Module &M); - - // pass identification - static char ID; - - private: - // make a reference table to refer to function by number - void buildFunctionRefs(Module &M); - - // process argument info of a program from the input file - void handleArgumentInfo(); - - // process path number information from the input file - void handlePathInfo(); - - // array of references to the functions in the module - std::vector<Function*> _functions; - - // path profile file handle - FILE* _file; - - // path profile file name - std::string _filename; - }; -} - -// register PathLoader -char PathProfileLoaderPass::ID = 0; - -INITIALIZE_ANALYSIS_GROUP(PathProfileInfo, "Path Profile Information", - NoPathProfileInfo) -INITIALIZE_AG_PASS(PathProfileLoaderPass, PathProfileInfo, - "path-profile-loader", - "Load path profile information from file", - false, true, false) - -char &llvm::PathProfileLoaderPassID = PathProfileLoaderPass::ID; - -// link PathLoader as a pass, and make it available as an optimisation -ModulePass *llvm::createPathProfileLoaderPass() { - return new PathProfileLoaderPass; -} - -// ---------------------------------------------------------------------------- -// PathEdge implementation -// -ProfilePathEdge::ProfilePathEdge (BasicBlock* source, BasicBlock* target, - unsigned duplicateNumber) - : _source(source), _target(target), _duplicateNumber(duplicateNumber) {} - -// ---------------------------------------------------------------------------- -// Path implementation -// - -ProfilePath::ProfilePath (unsigned int number, unsigned int count, - double countStdDev, PathProfileInfo* ppi) - : _number(number) , _count(count), _countStdDev(countStdDev), _ppi(ppi) {} - -double ProfilePath::getFrequency() const { - return 100 * double(_count) / - double(_ppi->_functionPathCounts[_ppi->_currentFunction]); -} - -static BallLarusEdge* getNextEdge (BallLarusNode* node, - unsigned int pathNumber) { - BallLarusEdge* best = 0; - - for( BLEdgeIterator next = node->succBegin(), - end = node->succEnd(); next != end; next++ ) { - if( (*next)->getType() != BallLarusEdge::BACKEDGE && // no backedges - (*next)->getType() != BallLarusEdge::SPLITEDGE && // no split edges - (*next)->getWeight() <= pathNumber && // weight must be <= pathNumber - (!best || (best->getWeight() < (*next)->getWeight())) ) // best one? - best = *next; - } - - return best; -} - -ProfilePathEdgeVector* ProfilePath::getPathEdges() const { - BallLarusNode* currentNode = _ppi->_currentDag->getRoot (); - unsigned int increment = _number; - ProfilePathEdgeVector* pev = new ProfilePathEdgeVector; - - while (currentNode != _ppi->_currentDag->getExit()) { - BallLarusEdge* next = getNextEdge(currentNode, increment); - - increment -= next->getWeight(); - - if( next->getType() != BallLarusEdge::BACKEDGE_PHONY && - next->getType() != BallLarusEdge::SPLITEDGE_PHONY && - next->getTarget() != _ppi->_currentDag->getExit() ) - pev->push_back(ProfilePathEdge( - next->getSource()->getBlock(), - next->getTarget()->getBlock(), - next->getDuplicateNumber())); - - if( next->getType() == BallLarusEdge::BACKEDGE_PHONY && - next->getTarget() == _ppi->_currentDag->getExit() ) - pev->push_back(ProfilePathEdge( - next->getRealEdge()->getSource()->getBlock(), - next->getRealEdge()->getTarget()->getBlock(), - next->getDuplicateNumber())); - - if( next->getType() == BallLarusEdge::SPLITEDGE_PHONY && - next->getSource() == _ppi->_currentDag->getRoot() ) - pev->push_back(ProfilePathEdge( - next->getRealEdge()->getSource()->getBlock(), - next->getRealEdge()->getTarget()->getBlock(), - next->getDuplicateNumber())); - - // set the new node - currentNode = next->getTarget(); - } - - return pev; -} - -ProfilePathBlockVector* ProfilePath::getPathBlocks() const { - BallLarusNode* currentNode = _ppi->_currentDag->getRoot (); - unsigned int increment = _number; - ProfilePathBlockVector* pbv = new ProfilePathBlockVector; - - while (currentNode != _ppi->_currentDag->getExit()) { - BallLarusEdge* next = getNextEdge(currentNode, increment); - increment -= next->getWeight(); - - // add block to the block list if it is a real edge - if( next->getType() == BallLarusEdge::NORMAL) - pbv->push_back (currentNode->getBlock()); - // make the back edge the last edge since we are at the end - else if( next->getTarget() == _ppi->_currentDag->getExit() ) { - pbv->push_back (currentNode->getBlock()); - pbv->push_back (next->getRealEdge()->getTarget()->getBlock()); - } - - // set the new node - currentNode = next->getTarget(); - } - - return pbv; -} - -BasicBlock* ProfilePath::getFirstBlockInPath() const { - BallLarusNode* root = _ppi->_currentDag->getRoot(); - BallLarusEdge* edge = getNextEdge(root, _number); - - if( edge && (edge->getType() == BallLarusEdge::BACKEDGE_PHONY || - edge->getType() == BallLarusEdge::SPLITEDGE_PHONY) ) - return edge->getTarget()->getBlock(); - - return root->getBlock(); -} - -// ---------------------------------------------------------------------------- -// PathProfileInfo implementation -// - -// Pass identification -char llvm::PathProfileInfo::ID = 0; - -PathProfileInfo::PathProfileInfo () : _currentDag(0) , _currentFunction(0) { -} - -PathProfileInfo::~PathProfileInfo() { - if (_currentDag) - delete _currentDag; -} - -// set the function for which paths are currently begin processed -void PathProfileInfo::setCurrentFunction(Function* F) { - // Make sure it exists - if (!F) return; - - if (_currentDag) - delete _currentDag; - - _currentFunction = F; - _currentDag = new BallLarusDag(*F); - _currentDag->init(); - _currentDag->calculatePathNumbers(); -} - -// get the function for which paths are currently being processed -Function* PathProfileInfo::getCurrentFunction() const { - return _currentFunction; -} - -// get the entry block of the function -BasicBlock* PathProfileInfo::getCurrentFunctionEntry() { - return _currentDag->getRoot()->getBlock(); -} - -// return the path based on its number -ProfilePath* PathProfileInfo::getPath(unsigned int number) { - return _functionPaths[_currentFunction][number]; -} - -// return the number of paths which a function may potentially execute -unsigned int PathProfileInfo::getPotentialPathCount() { - return _currentDag ? _currentDag->getNumberOfPaths() : 0; -} - -// return an iterator for the beginning of a functions executed paths -ProfilePathIterator PathProfileInfo::pathBegin() { - return _functionPaths[_currentFunction].begin(); -} - -// return an iterator for the end of a functions executed paths -ProfilePathIterator PathProfileInfo::pathEnd() { - return _functionPaths[_currentFunction].end(); -} - -// returns the total number of paths run in the function -unsigned int PathProfileInfo::pathsRun() { - return _currentFunction ? _functionPaths[_currentFunction].size() : 0; -} - -// ---------------------------------------------------------------------------- -// PathLoader implementation -// - -// remove all generated paths -PathProfileLoaderPass::~PathProfileLoaderPass() { - for( FunctionPathIterator funcNext = _functionPaths.begin(), - funcEnd = _functionPaths.end(); funcNext != funcEnd; funcNext++) - for( ProfilePathIterator pathNext = funcNext->second.begin(), - pathEnd = funcNext->second.end(); pathNext != pathEnd; pathNext++) - delete pathNext->second; -} - -// entry point of the pass; this loads and parses a file -bool PathProfileLoaderPass::runOnModule(Module &M) { - // get the filename and setup the module's function references - _filename = PathProfileInfoFilename; - buildFunctionRefs (M); - - if (!(_file = fopen(_filename.c_str(), "rb"))) { - errs () << "error: input '" << _filename << "' file does not exist.\n"; - return false; - } - - ProfilingType profType; - - while( fread(&profType, sizeof(ProfilingType), 1, _file) ) { - switch (profType) { - case ArgumentInfo: - handleArgumentInfo (); - break; - case PathInfo: - handlePathInfo (); - break; - default: - errs () << "error: bad path profiling file syntax, " << profType << "\n"; - fclose (_file); - return false; - } - } - - fclose (_file); - - return true; -} - -// create a reference table for functions defined in the path profile file -void PathProfileLoaderPass::buildFunctionRefs (Module &M) { - _functions.push_back(0); // make the 0 index a null pointer - - for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) { - if (F->isDeclaration()) - continue; - _functions.push_back(F); - } -} - -// handle command like argument infor in the output file -void PathProfileLoaderPass::handleArgumentInfo() { - // get the argument list's length - unsigned savedArgsLength; - if( fread(&savedArgsLength, sizeof(unsigned), 1, _file) != 1 ) { - errs() << "warning: argument info header/data mismatch\n"; - return; - } - - // allocate a buffer, and get the arguments - char* args = new char[savedArgsLength+1]; - if( fread(args, 1, savedArgsLength, _file) != savedArgsLength ) - errs() << "warning: argument info header/data mismatch\n"; - - args[savedArgsLength] = '\0'; - argList = std::string(args); - delete [] args; // cleanup dynamic string - - // byte alignment - if (savedArgsLength & 3) - fseek(_file, 4-(savedArgsLength&3), SEEK_CUR); -} - -// Handle path profile information in the output file -void PathProfileLoaderPass::handlePathInfo () { - // get the number of functions in this profile - unsigned functionCount; - if( fread(&functionCount, sizeof(functionCount), 1, _file) != 1 ) { - errs() << "warning: path info header/data mismatch\n"; - return; - } - - // gather path information for each function - for (unsigned i = 0; i < functionCount; i++) { - PathProfileHeader pathHeader; - if( fread(&pathHeader, sizeof(pathHeader), 1, _file) != 1 ) { - errs() << "warning: bad header for path function info\n"; - break; - } - - Function* f = _functions[pathHeader.fnNumber]; - - // dynamically allocate a table to store path numbers - PathProfileTableEntry* pathTable = - new PathProfileTableEntry[pathHeader.numEntries]; - - if( fread(pathTable, sizeof(PathProfileTableEntry), - pathHeader.numEntries, _file) != pathHeader.numEntries) { - delete [] pathTable; - errs() << "warning: path function info header/data mismatch\n"; - return; - } - - // Build a new path for the current function - unsigned int totalPaths = 0; - for (unsigned int j = 0; j < pathHeader.numEntries; j++) { - totalPaths += pathTable[j].pathCounter; - _functionPaths[f][pathTable[j].pathNumber] - = new ProfilePath(pathTable[j].pathNumber, pathTable[j].pathCounter, - 0, this); - } - - _functionPathCounts[f] = totalPaths; - - delete [] pathTable; - } -} - -//===----------------------------------------------------------------------===// -// NoProfile PathProfileInfo implementation -// - -namespace { - struct NoPathProfileInfo : public ImmutablePass, public PathProfileInfo { - static char ID; // Class identification, replacement for typeinfo - NoPathProfileInfo() : ImmutablePass(ID) { - initializeNoPathProfileInfoPass(*PassRegistry::getPassRegistry()); - } - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { - if (PI == &PathProfileInfo::ID) - return (PathProfileInfo*)this; - return this; - } - - virtual const char *getPassName() const { - return "NoPathProfileInfo"; - } - }; -} // End of anonymous namespace - -char NoPathProfileInfo::ID = 0; -// Register this pass... -INITIALIZE_AG_PASS(NoPathProfileInfo, PathProfileInfo, "no-path-profile", - "No Path Profile Information", false, true, true) - -ImmutablePass *llvm::createNoPathProfileInfoPass() { return new NoPathProfileInfo(); } diff --git a/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp b/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp deleted file mode 100644 index 48d7d05..0000000 --- a/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp +++ /dev/null @@ -1,206 +0,0 @@ -//===- PathProfileVerifier.cpp --------------------------------*- C++ -*---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This verifier derives an edge profile file from current path profile -// information -// -//===----------------------------------------------------------------------===// -#define DEBUG_TYPE "path-profile-verifier" - -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/PathProfileInfo.h" -#include "llvm/Analysis/ProfileInfoTypes.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include <stdio.h> - -using namespace llvm; - -namespace { - class PathProfileVerifier : public ModulePass { - private: - bool runOnModule(Module &M); - - public: - static char ID; // Pass identification, replacement for typeid - PathProfileVerifier() : ModulePass(ID) { - initializePathProfileVerifierPass(*PassRegistry::getPassRegistry()); - } - - - virtual const char *getPassName() const { - return "Path Profiler Verifier"; - } - - // The verifier requires the path profile and edge profile. - virtual void getAnalysisUsage(AnalysisUsage& AU) const; - }; -} - -static cl::opt<std::string> -EdgeProfileFilename("path-profile-verifier-file", - cl::init("edgefrompath.llvmprof.out"), - cl::value_desc("filename"), - cl::desc("Edge profile file generated by -path-profile-verifier"), - cl::Hidden); - -char PathProfileVerifier::ID = 0; -INITIALIZE_PASS(PathProfileVerifier, "path-profile-verifier", - "Compare the path profile derived edge profile against the " - "edge profile.", true, true) - -ModulePass *llvm::createPathProfileVerifierPass() { - return new PathProfileVerifier(); -} - -// The verifier requires the path profile and edge profile. -void PathProfileVerifier::getAnalysisUsage(AnalysisUsage& AU) const { - AU.addRequired<PathProfileInfo>(); - AU.addPreserved<PathProfileInfo>(); -} - -typedef std::map<unsigned, unsigned> DuplicateToIndexMap; -typedef std::map<BasicBlock*,DuplicateToIndexMap> BlockToDuplicateMap; -typedef std::map<BasicBlock*,BlockToDuplicateMap> NestedBlockToIndexMap; - -// the verifier iterates through each path to gather the total -// number of edge frequencies -bool PathProfileVerifier::runOnModule (Module &M) { - PathProfileInfo& pathProfileInfo = getAnalysis<PathProfileInfo>(); - - // setup a data structure to map path edges which index an - // array of edge counters - NestedBlockToIndexMap arrayMap; - unsigned i = 0; - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (F->isDeclaration()) continue; - - arrayMap[(BasicBlock*)0][F->begin()][0] = i++; - - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - TerminatorInst *TI = BB->getTerminator(); - - unsigned duplicate = 0; - BasicBlock* prev = 0; - for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; - prev = TI->getSuccessor(s), ++s) { - if (prev == TI->getSuccessor(s)) - duplicate++; - else duplicate = 0; - - arrayMap[BB][TI->getSuccessor(s)][duplicate] = i++; - } - } - } - - std::vector<unsigned> edgeArray(i); - - // iterate through each path and increment the edge counters as needed - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (F->isDeclaration()) continue; - - pathProfileInfo.setCurrentFunction(F); - - DEBUG(dbgs() << "function '" << F->getName() << "' ran " - << pathProfileInfo.pathsRun() - << "/" << pathProfileInfo.getPotentialPathCount() - << " potential paths\n"); - - for( ProfilePathIterator nextPath = pathProfileInfo.pathBegin(), - endPath = pathProfileInfo.pathEnd(); - nextPath != endPath; nextPath++ ) { - ProfilePath* currentPath = nextPath->second; - - ProfilePathEdgeVector* pev = currentPath->getPathEdges(); - DEBUG(dbgs () << "path #" << currentPath->getNumber() << ": " - << currentPath->getCount() << "\n"); - // setup the entry edge (normally path profiling doesn't care about this) - if (currentPath->getFirstBlockInPath() == &F->getEntryBlock()) - edgeArray[arrayMap[(BasicBlock*)0][currentPath->getFirstBlockInPath()][0]] - += currentPath->getCount(); - - for( ProfilePathEdgeIterator nextEdge = pev->begin(), - endEdge = pev->end(); nextEdge != endEdge; nextEdge++ ) { - if (nextEdge != pev->begin()) - DEBUG(dbgs() << " :: "); - - BasicBlock* source = nextEdge->getSource(); - BasicBlock* target = nextEdge->getTarget(); - unsigned duplicateNumber = nextEdge->getDuplicateNumber(); - DEBUG(dbgs() << source->getName() << " --{" << duplicateNumber - << "}--> " << target->getName()); - - // Ensure all the referenced edges exist - // TODO: make this a separate function - if( !arrayMap.count(source) ) { - errs() << " error [" << F->getName() << "()]: source '" - << source->getName() - << "' does not exist in the array map.\n"; - } else if( !arrayMap[source].count(target) ) { - errs() << " error [" << F->getName() << "()]: target '" - << target->getName() - << "' does not exist in the array map.\n"; - } else if( !arrayMap[source][target].count(duplicateNumber) ) { - errs() << " error [" << F->getName() << "()]: edge " - << source->getName() << " -> " << target->getName() - << " duplicate number " << duplicateNumber - << " does not exist in the array map.\n"; - } else { - edgeArray[arrayMap[source][target][duplicateNumber]] - += currentPath->getCount(); - } - } - - DEBUG(errs() << "\n"); - - delete pev; - } - } - - std::string errorInfo; - std::string filename = EdgeProfileFilename; - - // Open a handle to the file - FILE* edgeFile = fopen(filename.c_str(),"wb"); - - if (!edgeFile) { - errs() << "error: unable to open file '" << filename << "' for output.\n"; - return false; - } - - errs() << "Generating edge profile '" << filename << "' ...\n"; - - // write argument info - unsigned type = ArgumentInfo; - unsigned num = pathProfileInfo.argList.size(); - int zeros = 0; - - fwrite(&type,sizeof(unsigned),1,edgeFile); - fwrite(&num,sizeof(unsigned),1,edgeFile); - fwrite(pathProfileInfo.argList.c_str(),1,num,edgeFile); - if (num&3) - fwrite(&zeros, 1, 4-(num&3), edgeFile); - - type = EdgeInfo; - num = edgeArray.size(); - fwrite(&type,sizeof(unsigned),1,edgeFile); - fwrite(&num,sizeof(unsigned),1,edgeFile); - - // write each edge to the file - for( std::vector<unsigned>::iterator s = edgeArray.begin(), - e = edgeArray.end(); s != e; s++) - fwrite(&*s, sizeof (unsigned), 1, edgeFile); - - fclose (edgeFile); - - return true; -} diff --git a/contrib/llvm/lib/Analysis/ProfileDataLoader.cpp b/contrib/llvm/lib/Analysis/ProfileDataLoader.cpp deleted file mode 100644 index d7f444b..0000000 --- a/contrib/llvm/lib/Analysis/ProfileDataLoader.cpp +++ /dev/null @@ -1,155 +0,0 @@ -//===- ProfileDataLoader.cpp - Load profile information from disk ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// The ProfileDataLoader class is used to load raw profiling data from the dump -// file. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/ProfileDataLoader.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/Analysis/ProfileDataTypes.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/system_error.h" -#include <cstdio> -#include <cstdlib> -using namespace llvm; - -raw_ostream &llvm::operator<<(raw_ostream &O, std::pair<const BasicBlock *, - const BasicBlock *> E) { - O << "("; - - if (E.first) - O << E.first->getName(); - else - O << "0"; - - O << ","; - - if (E.second) - O << E.second->getName(); - else - O << "0"; - - return O << ")"; -} - -/// AddCounts - Add 'A' and 'B', accounting for the fact that the value of one -/// (or both) may not be defined. -static unsigned AddCounts(unsigned A, unsigned B) { - // If either value is undefined, use the other. - // Undefined + undefined = undefined. - if (A == ProfileDataLoader::Uncounted) return B; - if (B == ProfileDataLoader::Uncounted) return A; - - return A + B; -} - -/// ReadProfilingData - Load 'NumEntries' items of type 'T' from file 'F' -template <typename T> -static void ReadProfilingData(const char *ToolName, FILE *F, - T *Data, size_t NumEntries) { - // Read in the block of data... - if (fread(Data, sizeof(T), NumEntries, F) != NumEntries) - report_fatal_error(Twine(ToolName) + ": Profiling data truncated"); -} - -/// ReadProfilingNumEntries - Read how many entries are in this profiling data -/// packet. -static unsigned ReadProfilingNumEntries(const char *ToolName, FILE *F, - bool ShouldByteSwap) { - unsigned Entry; - ReadProfilingData<unsigned>(ToolName, F, &Entry, 1); - return ShouldByteSwap ? ByteSwap_32(Entry) : Entry; -} - -/// ReadProfilingBlock - Read the number of entries in the next profiling data -/// packet and then accumulate the entries into 'Data'. -static void ReadProfilingBlock(const char *ToolName, FILE *F, - bool ShouldByteSwap, - SmallVector<unsigned, 32> &Data) { - // Read the number of entries... - unsigned NumEntries = ReadProfilingNumEntries(ToolName, F, ShouldByteSwap); - - // Read in the data. - SmallVector<unsigned, 8> TempSpace(NumEntries); - ReadProfilingData<unsigned>(ToolName, F, TempSpace.data(), NumEntries); - - // Make sure we have enough space ... - if (Data.size() < NumEntries) - Data.resize(NumEntries, ProfileDataLoader::Uncounted); - - // Accumulate the data we just read into the existing data. - for (unsigned i = 0; i < NumEntries; ++i) { - unsigned Entry = ShouldByteSwap ? ByteSwap_32(TempSpace[i]) : TempSpace[i]; - Data[i] = AddCounts(Entry, Data[i]); - } -} - -/// ReadProfilingArgBlock - Read the command line arguments that the progam was -/// run with when the current profiling data packet(s) were generated. -static void ReadProfilingArgBlock(const char *ToolName, FILE *F, - bool ShouldByteSwap, - SmallVector<std::string, 1> &CommandLines) { - // Read the number of bytes ... - unsigned ArgLength = ReadProfilingNumEntries(ToolName, F, ShouldByteSwap); - - // Read in the arguments (if there are any to read). Round up the length to - // the nearest 4-byte multiple. - SmallVector<char, 8> Args(ArgLength+4); - if (ArgLength) - ReadProfilingData<char>(ToolName, F, Args.data(), (ArgLength+3) & ~3); - - // Store the arguments. - CommandLines.push_back(std::string(&Args[0], &Args[ArgLength])); -} - -const unsigned ProfileDataLoader::Uncounted = ~0U; - -/// ProfileDataLoader ctor - Read the specified profiling data file, reporting -/// a fatal error if the file is invalid or broken. -ProfileDataLoader::ProfileDataLoader(const char *ToolName, - const std::string &Filename) - : Filename(Filename) { - FILE *F = fopen(Filename.c_str(), "rb"); - if (F == 0) - report_fatal_error(Twine(ToolName) + ": Error opening '" + - Filename + "': "); - - // Keep reading packets until we run out of them. - unsigned PacketType; - while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) { - // If the low eight bits of the packet are zero, we must be dealing with an - // endianness mismatch. Byteswap all words read from the profiling - // information. This can happen when the compiler host and target have - // different endianness. - bool ShouldByteSwap = (char)PacketType == 0; - PacketType = ShouldByteSwap ? ByteSwap_32(PacketType) : PacketType; - - switch (PacketType) { - case ArgumentInfo: - ReadProfilingArgBlock(ToolName, F, ShouldByteSwap, CommandLines); - break; - - case EdgeInfo: - ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts); - break; - - default: - report_fatal_error(std::string(ToolName) - + ": Unknown profiling packet type"); - break; - } - } - - fclose(F); -} diff --git a/contrib/llvm/lib/Analysis/ProfileDataLoaderPass.cpp b/contrib/llvm/lib/Analysis/ProfileDataLoaderPass.cpp deleted file mode 100644 index 2ee0093..0000000 --- a/contrib/llvm/lib/Analysis/ProfileDataLoaderPass.cpp +++ /dev/null @@ -1,188 +0,0 @@ -//===- ProfileDataLoaderPass.cpp - Set branch weight metadata from prof ---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass loads profiling data from a dump file and sets branch weight -// metadata. -// -// TODO: Replace all "profile-metadata-loader" strings with "profile-loader" -// once ProfileInfo etc. has been removed. -// -//===----------------------------------------------------------------------===// -#define DEBUG_TYPE "profile-metadata-loader" -#include "llvm/Analysis/Passes.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/ProfileDataLoader.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/MDBuilder.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -STATISTIC(NumEdgesRead, "The # of edges read."); -STATISTIC(NumTermsAnnotated, "The # of terminator instructions annotated."); - -static cl::opt<std::string> -ProfileMetadataFilename("profile-file", cl::init("llvmprof.out"), - cl::value_desc("filename"), - cl::desc("Profile file loaded by -profile-metadata-loader")); - -namespace { - /// This pass loads profiling data from a dump file and sets branch weight - /// metadata. - class ProfileMetadataLoaderPass : public ModulePass { - std::string Filename; - public: - static char ID; // Class identification, replacement for typeinfo - explicit ProfileMetadataLoaderPass(const std::string &filename = "") - : ModulePass(ID), Filename(filename) { - initializeProfileMetadataLoaderPassPass(*PassRegistry::getPassRegistry()); - if (filename.empty()) Filename = ProfileMetadataFilename; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - } - - virtual const char *getPassName() const { - return "Profile loader"; - } - - virtual void readEdge(unsigned, ProfileData&, ProfileData::Edge, - ArrayRef<unsigned>); - virtual unsigned matchEdges(Module&, ProfileData&, ArrayRef<unsigned>); - virtual void setBranchWeightMetadata(Module&, ProfileData&); - - virtual bool runOnModule(Module &M); - }; -} // End of anonymous namespace - -char ProfileMetadataLoaderPass::ID = 0; -INITIALIZE_PASS_BEGIN(ProfileMetadataLoaderPass, "profile-metadata-loader", - "Load profile information from llvmprof.out", false, true) -INITIALIZE_PASS_END(ProfileMetadataLoaderPass, "profile-metadata-loader", - "Load profile information from llvmprof.out", false, true) - -char &llvm::ProfileMetadataLoaderPassID = ProfileMetadataLoaderPass::ID; - -/// createProfileMetadataLoaderPass - This function returns a Pass that loads -/// the profiling information for the module from the specified filename, -/// making it available to the optimizers. -ModulePass *llvm::createProfileMetadataLoaderPass() { - return new ProfileMetadataLoaderPass(); -} -ModulePass *llvm::createProfileMetadataLoaderPass(const std::string &Filename) { - return new ProfileMetadataLoaderPass(Filename); -} - -/// readEdge - Take the value from a profile counter and assign it to an edge. -void ProfileMetadataLoaderPass::readEdge(unsigned ReadCount, - ProfileData &PB, ProfileData::Edge e, - ArrayRef<unsigned> Counters) { - if (ReadCount >= Counters.size()) return; - - unsigned weight = Counters[ReadCount]; - assert(weight != ProfileDataLoader::Uncounted); - PB.addEdgeWeight(e, weight); - - DEBUG(dbgs() << "-- Read Edge Counter for " << e - << " (# "<< (ReadCount) << "): " - << PB.getEdgeWeight(e) << "\n"); -} - -/// matchEdges - Link every profile counter with an edge. -unsigned ProfileMetadataLoaderPass::matchEdges(Module &M, ProfileData &PB, - ArrayRef<unsigned> Counters) { - if (Counters.size() == 0) return 0; - - unsigned ReadCount = 0; - - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (F->isDeclaration()) continue; - DEBUG(dbgs() << "Loading edges in '" << F->getName() << "'\n"); - readEdge(ReadCount++, PB, PB.getEdge(0, &F->getEntryBlock()), Counters); - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - TerminatorInst *TI = BB->getTerminator(); - for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) { - readEdge(ReadCount++, PB, PB.getEdge(BB,TI->getSuccessor(s)), - Counters); - } - } - } - - return ReadCount; -} - -/// setBranchWeightMetadata - Translate the counter values associated with each -/// edge into branch weights for each conditional branch (a branch with 2 or -/// more desinations). -void ProfileMetadataLoaderPass::setBranchWeightMetadata(Module &M, - ProfileData &PB) { - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (F->isDeclaration()) continue; - DEBUG(dbgs() << "Setting branch metadata in '" << F->getName() << "'\n"); - - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - TerminatorInst *TI = BB->getTerminator(); - unsigned NumSuccessors = TI->getNumSuccessors(); - - // If there is only one successor then we can not set a branch - // probability as the target is certain. - if (NumSuccessors < 2) continue; - - // Load the weights of all edges leading from this terminator. - DEBUG(dbgs() << "-- Terminator with " << NumSuccessors - << " successors:\n"); - SmallVector<uint32_t, 4> Weights(NumSuccessors); - for (unsigned s = 0 ; s < NumSuccessors ; ++s) { - ProfileData::Edge edge = PB.getEdge(BB, TI->getSuccessor(s)); - Weights[s] = (uint32_t)PB.getEdgeWeight(edge); - DEBUG(dbgs() << "---- Edge '" << edge << "' has weight " - << Weights[s] << "\n"); - } - - // Set branch weight metadata. This will set branch probabilities of - // 100%/0% if that is true of the dynamic execution. - // BranchProbabilityInfo can account for this when it loads this metadata - // (it gives the unexectuted branch a weight of 1 for the purposes of - // probability calculations). - MDBuilder MDB(TI->getContext()); - MDNode *Node = MDB.createBranchWeights(Weights); - TI->setMetadata(LLVMContext::MD_prof, Node); - NumTermsAnnotated++; - } - } -} - -bool ProfileMetadataLoaderPass::runOnModule(Module &M) { - ProfileDataLoader PDL("profile-data-loader", Filename); - ProfileData PB; - - ArrayRef<unsigned> Counters = PDL.getRawEdgeCounts(); - - unsigned ReadCount = matchEdges(M, PB, Counters); - - if (ReadCount != Counters.size()) { - errs() << "WARNING: profile information is inconsistent with " - << "the current program!\n"; - } - NumEdgesRead = ReadCount; - - setBranchWeightMetadata(M, PB); - - return ReadCount > 0; -} diff --git a/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp b/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp deleted file mode 100644 index b284b99..0000000 --- a/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp +++ /dev/null @@ -1,426 +0,0 @@ -//===- ProfileEstimatorPass.cpp - LLVM Pass to estimate profile info ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a concrete implementation of profiling information that -// estimates the profiling information in a very crude and unimaginative way. -// -//===----------------------------------------------------------------------===// -#define DEBUG_TYPE "profile-estimator" -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ProfileInfo.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -static cl::opt<double> -LoopWeight( - "profile-estimator-loop-weight", cl::init(10), - cl::value_desc("loop-weight"), - cl::desc("Number of loop executions used for profile-estimator") -); - -namespace { - class ProfileEstimatorPass : public FunctionPass, public ProfileInfo { - double ExecCount; - LoopInfo *LI; - std::set<BasicBlock*> BBToVisit; - std::map<Loop*,double> LoopExitWeights; - std::map<Edge,double> MinimalWeight; - public: - static char ID; // Class identification, replacement for typeinfo - explicit ProfileEstimatorPass(const double execcount = 0) - : FunctionPass(ID), ExecCount(execcount) { - initializeProfileEstimatorPassPass(*PassRegistry::getPassRegistry()); - if (execcount == 0) ExecCount = LoopWeight; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequired<LoopInfo>(); - } - - virtual const char *getPassName() const { - return "Profiling information estimator"; - } - - /// run - Estimate the profile information from the specified file. - virtual bool runOnFunction(Function &F); - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { - if (PI == &ProfileInfo::ID) - return (ProfileInfo*)this; - return this; - } - - virtual void recurseBasicBlock(BasicBlock *BB); - - void inline printEdgeWeight(Edge); - }; -} // End of anonymous namespace - -char ProfileEstimatorPass::ID = 0; -INITIALIZE_AG_PASS_BEGIN(ProfileEstimatorPass, ProfileInfo, "profile-estimator", - "Estimate profiling information", false, true, false) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) -INITIALIZE_AG_PASS_END(ProfileEstimatorPass, ProfileInfo, "profile-estimator", - "Estimate profiling information", false, true, false) - -namespace llvm { - char &ProfileEstimatorPassID = ProfileEstimatorPass::ID; - - FunctionPass *createProfileEstimatorPass() { - return new ProfileEstimatorPass(); - } - - /// createProfileEstimatorPass - This function returns a Pass that estimates - /// profiling information using the given loop execution count. - Pass *createProfileEstimatorPass(const unsigned execcount) { - return new ProfileEstimatorPass(execcount); - } -} - -static double ignoreMissing(double w) { - if (w == ProfileInfo::MissingValue) return 0; - return w; -} - -static void inline printEdgeError(ProfileInfo::Edge e, const char *M) { - DEBUG(dbgs() << "-- Edge " << e << " is not calculated, " << M << "\n"); -} - -void inline ProfileEstimatorPass::printEdgeWeight(Edge E) { - DEBUG(dbgs() << "-- Weight of Edge " << E << ":" - << format("%20.20g", getEdgeWeight(E)) << "\n"); -} - -// recurseBasicBlock() - This calculates the ProfileInfo estimation for a -// single block and then recurses into the successors. -// The algorithm preserves the flow condition, meaning that the sum of the -// weight of the incoming edges must be equal the block weight which must in -// turn be equal to the sume of the weights of the outgoing edges. -// Since the flow of an block is deterimined from the current state of the -// flow, once an edge has a flow assigned this flow is never changed again, -// otherwise it would be possible to violate the flow condition in another -// block. -void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) { - - // Break the recursion if this BasicBlock was already visited. - if (BBToVisit.find(BB) == BBToVisit.end()) return; - - // Read the LoopInfo for this block. - bool BBisHeader = LI->isLoopHeader(BB); - Loop* BBLoop = LI->getLoopFor(BB); - - // To get the block weight, read all incoming edges. - double BBWeight = 0; - std::set<BasicBlock*> ProcessedPreds; - for ( pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); - bbi != bbe; ++bbi ) { - // If this block was not considered already, add weight. - Edge edge = getEdge(*bbi,BB); - double w = getEdgeWeight(edge); - if (ProcessedPreds.insert(*bbi).second) { - BBWeight += ignoreMissing(w); - } - // If this block is a loop header and the predecessor is contained in this - // loop, thus the edge is a backedge, continue and do not check if the - // value is valid. - if (BBisHeader && BBLoop->contains(*bbi)) { - printEdgeError(edge, "but is backedge, continuing"); - continue; - } - // If the edges value is missing (and this is no loop header, and this is - // no backedge) return, this block is currently non estimatable. - if (w == MissingValue) { - printEdgeError(edge, "returning"); - return; - } - } - if (getExecutionCount(BB) != MissingValue) { - BBWeight = getExecutionCount(BB); - } - - // Fetch all necessary information for current block. - SmallVector<Edge, 8> ExitEdges; - SmallVector<Edge, 8> Edges; - if (BBLoop) { - BBLoop->getExitEdges(ExitEdges); - } - - // If this is a loop header, consider the following: - // Exactly the flow that is entering this block, must exit this block too. So - // do the following: - // *) get all the exit edges, read the flow that is already leaving this - // loop, remember the edges that do not have any flow on them right now. - // (The edges that have already flow on them are most likely exiting edges of - // other loops, do not touch those flows because the previously caclulated - // loopheaders would not be exact anymore.) - // *) In case there is not a single exiting edge left, create one at the loop - // latch to prevent the flow from building up in the loop. - // *) Take the flow that is not leaving the loop already and distribute it on - // the remaining exiting edges. - // (This ensures that all flow that enters the loop also leaves it.) - // *) Increase the flow into the loop by increasing the weight of this block. - // There is at least one incoming backedge that will bring us this flow later - // on. (So that the flow condition in this node is valid again.) - if (BBisHeader) { - double incoming = BBWeight; - // Subtract the flow leaving the loop. - std::set<Edge> ProcessedExits; - for (SmallVector<Edge, 8>::iterator ei = ExitEdges.begin(), - ee = ExitEdges.end(); ei != ee; ++ei) { - if (ProcessedExits.insert(*ei).second) { - double w = getEdgeWeight(*ei); - if (w == MissingValue) { - Edges.push_back(*ei); - // Check if there is a necessary minimal weight, if yes, subtract it - // from weight. - if (MinimalWeight.find(*ei) != MinimalWeight.end()) { - incoming -= MinimalWeight[*ei]; - DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n"); - } - } else { - incoming -= w; - } - } - } - // If no exit edges, create one: - if (Edges.size() == 0) { - BasicBlock *Latch = BBLoop->getLoopLatch(); - if (Latch) { - Edge edge = getEdge(Latch,0); - EdgeInformation[BB->getParent()][edge] = BBWeight; - printEdgeWeight(edge); - edge = getEdge(Latch, BB); - EdgeInformation[BB->getParent()][edge] = BBWeight * ExecCount; - printEdgeWeight(edge); - } - } - - // Distribute remaining weight to the exting edges. To prevent fractions - // from building up and provoking precision problems the weight which is to - // be distributed is split and the rounded, the last edge gets a somewhat - // bigger value, but we are close enough for an estimation. - double fraction = floor(incoming/Edges.size()); - for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end(); - ei != ee; ++ei) { - double w = 0; - if (ei != (ee-1)) { - w = fraction; - incoming -= fraction; - } else { - w = incoming; - } - EdgeInformation[BB->getParent()][*ei] += w; - // Read necessary minimal weight. - if (MinimalWeight.find(*ei) != MinimalWeight.end()) { - EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei]; - DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n"); - } - printEdgeWeight(*ei); - - // Add minimal weight to paths to all exit edges, this is used to ensure - // that enough flow is reaching this edges. - Path p; - const BasicBlock *Dest = GetPath(BB, (*ei).first, p, GetPathToDest); - while (Dest != BB) { - const BasicBlock *Parent = p.find(Dest)->second; - Edge e = getEdge(Parent, Dest); - if (MinimalWeight.find(e) == MinimalWeight.end()) { - MinimalWeight[e] = 0; - } - MinimalWeight[e] += w; - DEBUG(dbgs() << "Minimal Weight for " << e << ": " << format("%.20g",MinimalWeight[e]) << "\n"); - Dest = Parent; - } - } - // Increase flow into the loop. - BBWeight *= (ExecCount+1); - } - - BlockInformation[BB->getParent()][BB] = BBWeight; - // Up until now we considered only the loop exiting edges, now we have a - // definite block weight and must distribute this onto the outgoing edges. - // Since there may be already flow attached to some of the edges, read this - // flow first and remember the edges that have still now flow attached. - Edges.clear(); - std::set<BasicBlock*> ProcessedSuccs; - - succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB); - // Also check for (BB,0) edges that may already contain some flow. (But only - // in case there are no successors.) - if (bbi == bbe) { - Edge edge = getEdge(BB,0); - EdgeInformation[BB->getParent()][edge] = BBWeight; - printEdgeWeight(edge); - } - for ( ; bbi != bbe; ++bbi ) { - if (ProcessedSuccs.insert(*bbi).second) { - Edge edge = getEdge(BB,*bbi); - double w = getEdgeWeight(edge); - if (w != MissingValue) { - BBWeight -= getEdgeWeight(edge); - } else { - Edges.push_back(edge); - // If minimal weight is necessary, reserve weight by subtracting weight - // from block weight, this is readded later on. - if (MinimalWeight.find(edge) != MinimalWeight.end()) { - BBWeight -= MinimalWeight[edge]; - DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[edge]) << " at " << edge << "\n"); - } - } - } - } - - double fraction = Edges.size() ? floor(BBWeight/Edges.size()) : 0.0; - // Finally we know what flow is still not leaving the block, distribute this - // flow onto the empty edges. - for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end(); - ei != ee; ++ei) { - if (ei != (ee-1)) { - EdgeInformation[BB->getParent()][*ei] += fraction; - BBWeight -= fraction; - } else { - EdgeInformation[BB->getParent()][*ei] += BBWeight; - } - // Readd minial necessary weight. - if (MinimalWeight.find(*ei) != MinimalWeight.end()) { - EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei]; - DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n"); - } - printEdgeWeight(*ei); - } - - // This block is visited, mark this before the recursion. - BBToVisit.erase(BB); - - // Recurse into successors. - for (succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB); - bbi != bbe; ++bbi) { - recurseBasicBlock(*bbi); - } -} - -bool ProfileEstimatorPass::runOnFunction(Function &F) { - if (F.isDeclaration()) return false; - - // Fetch LoopInfo and clear ProfileInfo for this function. - LI = &getAnalysis<LoopInfo>(); - FunctionInformation.erase(&F); - BlockInformation[&F].clear(); - EdgeInformation[&F].clear(); - BBToVisit.clear(); - - // Mark all blocks as to visit. - for (Function::iterator bi = F.begin(), be = F.end(); bi != be; ++bi) - BBToVisit.insert(bi); - - // Clear Minimal Edges. - MinimalWeight.clear(); - - DEBUG(dbgs() << "Working on function " << F.getName() << "\n"); - - // Since the entry block is the first one and has no predecessors, the edge - // (0,entry) is inserted with the starting weight of 1. - BasicBlock *entry = &F.getEntryBlock(); - BlockInformation[&F][entry] = pow(2.0, 32.0); - Edge edge = getEdge(0,entry); - EdgeInformation[&F][edge] = BlockInformation[&F][entry]; - printEdgeWeight(edge); - - // Since recurseBasicBlock() maybe returns with a block which was not fully - // estimated, use recurseBasicBlock() until everything is calculated. - bool cleanup = false; - recurseBasicBlock(entry); - while (BBToVisit.size() > 0 && !cleanup) { - // Remember number of open blocks, this is later used to check if progress - // was made. - unsigned size = BBToVisit.size(); - - // Try to calculate all blocks in turn. - for (std::set<BasicBlock*>::iterator bi = BBToVisit.begin(), - be = BBToVisit.end(); bi != be; ++bi) { - recurseBasicBlock(*bi); - // If at least one block was finished, break because iterator may be - // invalid. - if (BBToVisit.size() < size) break; - } - - // If there was not a single block resolved, make some assumptions. - if (BBToVisit.size() == size) { - bool found = false; - for (std::set<BasicBlock*>::iterator BBI = BBToVisit.begin(), BBE = BBToVisit.end(); - (BBI != BBE) && (!found); ++BBI) { - BasicBlock *BB = *BBI; - // Try each predecessor if it can be assumend. - for (pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); - (bbi != bbe) && (!found); ++bbi) { - Edge e = getEdge(*bbi,BB); - double w = getEdgeWeight(e); - // Check that edge from predecessor is still free. - if (w == MissingValue) { - // Check if there is a circle from this block to predecessor. - Path P; - const BasicBlock *Dest = GetPath(BB, *bbi, P, GetPathToDest); - if (Dest != *bbi) { - // If there is no circle, just set edge weight to 0 - EdgeInformation[&F][e] = 0; - DEBUG(dbgs() << "Assuming edge weight: "); - printEdgeWeight(e); - found = true; - } - } - } - } - if (!found) { - cleanup = true; - DEBUG(dbgs() << "No assumption possible in Fuction "<<F.getName()<<", setting all to zero\n"); - } - } - } - // In case there was no safe way to assume edges, set as a last measure, - // set _everything_ to zero. - if (cleanup) { - FunctionInformation[&F] = 0; - BlockInformation[&F].clear(); - EdgeInformation[&F].clear(); - for (Function::const_iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { - const BasicBlock *BB = &(*FI); - BlockInformation[&F][BB] = 0; - const_pred_iterator predi = pred_begin(BB), prede = pred_end(BB); - if (predi == prede) { - Edge e = getEdge(0,BB); - setEdgeWeight(e,0); - } - for (;predi != prede; ++predi) { - Edge e = getEdge(*predi,BB); - setEdgeWeight(e,0); - } - succ_const_iterator succi = succ_begin(BB), succe = succ_end(BB); - if (succi == succe) { - Edge e = getEdge(BB,0); - setEdgeWeight(e,0); - } - for (;succi != succe; ++succi) { - Edge e = getEdge(*succi,BB); - setEdgeWeight(e,0); - } - } - } - - return false; -} diff --git a/contrib/llvm/lib/Analysis/ProfileInfo.cpp b/contrib/llvm/lib/Analysis/ProfileInfo.cpp deleted file mode 100644 index 9626a48..0000000 --- a/contrib/llvm/lib/Analysis/ProfileInfo.cpp +++ /dev/null @@ -1,1079 +0,0 @@ -//===- ProfileInfo.cpp - Profile Info Interface ---------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the abstract ProfileInfo interface, and the default -// "no profile" implementation. -// -//===----------------------------------------------------------------------===// -#define DEBUG_TYPE "profile-info" -#include "llvm/Analysis/ProfileInfo.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/Pass.h" -#include "llvm/Support/CFG.h" -#include <limits> -#include <queue> -#include <set> -using namespace llvm; - -namespace llvm { - template<> char ProfileInfoT<Function,BasicBlock>::ID = 0; -} - -// Register the ProfileInfo interface, providing a nice name to refer to. -INITIALIZE_ANALYSIS_GROUP(ProfileInfo, "Profile Information", NoProfileInfo) - -namespace llvm { - -template <> -ProfileInfoT<MachineFunction, MachineBasicBlock>::ProfileInfoT() {} -template <> -ProfileInfoT<MachineFunction, MachineBasicBlock>::~ProfileInfoT() {} - -template <> -ProfileInfoT<Function, BasicBlock>::ProfileInfoT() { - MachineProfile = 0; -} -template <> -ProfileInfoT<Function, BasicBlock>::~ProfileInfoT() { - if (MachineProfile) delete MachineProfile; -} - -template<> -char ProfileInfoT<MachineFunction, MachineBasicBlock>::ID = 0; - -template<> -const double ProfileInfoT<Function,BasicBlock>::MissingValue = -1; - -template<> const -double ProfileInfoT<MachineFunction, MachineBasicBlock>::MissingValue = -1; - -template<> double -ProfileInfoT<Function,BasicBlock>::getExecutionCount(const BasicBlock *BB) { - std::map<const Function*, BlockCounts>::iterator J = - BlockInformation.find(BB->getParent()); - if (J != BlockInformation.end()) { - BlockCounts::iterator I = J->second.find(BB); - if (I != J->second.end()) - return I->second; - } - - double Count = MissingValue; - - const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB); - - // Are there zero predecessors of this block? - if (PI == PE) { - Edge e = getEdge(0, BB); - Count = getEdgeWeight(e); - } else { - // Otherwise, if there are predecessors, the execution count of this block is - // the sum of the edge frequencies from the incoming edges. - std::set<const BasicBlock*> ProcessedPreds; - Count = 0; - for (; PI != PE; ++PI) { - const BasicBlock *P = *PI; - if (ProcessedPreds.insert(P).second) { - double w = getEdgeWeight(getEdge(P, BB)); - if (w == MissingValue) { - Count = MissingValue; - break; - } - Count += w; - } - } - } - - // If the predecessors did not suffice to get block weight, try successors. - if (Count == MissingValue) { - - succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB); - - // Are there zero successors of this block? - if (SI == SE) { - Edge e = getEdge(BB,0); - Count = getEdgeWeight(e); - } else { - std::set<const BasicBlock*> ProcessedSuccs; - Count = 0; - for (; SI != SE; ++SI) - if (ProcessedSuccs.insert(*SI).second) { - double w = getEdgeWeight(getEdge(BB, *SI)); - if (w == MissingValue) { - Count = MissingValue; - break; - } - Count += w; - } - } - } - - if (Count != MissingValue) BlockInformation[BB->getParent()][BB] = Count; - return Count; -} - -template<> -double ProfileInfoT<MachineFunction, MachineBasicBlock>:: - getExecutionCount(const MachineBasicBlock *MBB) { - std::map<const MachineFunction*, BlockCounts>::iterator J = - BlockInformation.find(MBB->getParent()); - if (J != BlockInformation.end()) { - BlockCounts::iterator I = J->second.find(MBB); - if (I != J->second.end()) - return I->second; - } - - return MissingValue; -} - -template<> -double ProfileInfoT<Function,BasicBlock>::getExecutionCount(const Function *F) { - std::map<const Function*, double>::iterator J = - FunctionInformation.find(F); - if (J != FunctionInformation.end()) - return J->second; - - // isDeclaration() is checked here and not at start of function to allow - // functions without a body still to have a execution count. - if (F->isDeclaration()) return MissingValue; - - double Count = getExecutionCount(&F->getEntryBlock()); - if (Count != MissingValue) FunctionInformation[F] = Count; - return Count; -} - -template<> -double ProfileInfoT<MachineFunction, MachineBasicBlock>:: - getExecutionCount(const MachineFunction *MF) { - std::map<const MachineFunction*, double>::iterator J = - FunctionInformation.find(MF); - if (J != FunctionInformation.end()) - return J->second; - - double Count = getExecutionCount(&MF->front()); - if (Count != MissingValue) FunctionInformation[MF] = Count; - return Count; -} - -template<> -void ProfileInfoT<Function,BasicBlock>:: - setExecutionCount(const BasicBlock *BB, double w) { - DEBUG(dbgs() << "Creating Block " << BB->getName() - << " (weight: " << format("%.20g",w) << ")\n"); - BlockInformation[BB->getParent()][BB] = w; -} - -template<> -void ProfileInfoT<MachineFunction, MachineBasicBlock>:: - setExecutionCount(const MachineBasicBlock *MBB, double w) { - DEBUG(dbgs() << "Creating Block " << MBB->getBasicBlock()->getName() - << " (weight: " << format("%.20g",w) << ")\n"); - BlockInformation[MBB->getParent()][MBB] = w; -} - -template<> -void ProfileInfoT<Function,BasicBlock>::addEdgeWeight(Edge e, double w) { - double oldw = getEdgeWeight(e); - assert (oldw != MissingValue && "Adding weight to Edge with no previous weight"); - DEBUG(dbgs() << "Adding to Edge " << e - << " (new weight: " << format("%.20g",oldw + w) << ")\n"); - EdgeInformation[getFunction(e)][e] = oldw + w; -} - -template<> -void ProfileInfoT<Function,BasicBlock>:: - addExecutionCount(const BasicBlock *BB, double w) { - double oldw = getExecutionCount(BB); - assert (oldw != MissingValue && "Adding weight to Block with no previous weight"); - DEBUG(dbgs() << "Adding to Block " << BB->getName() - << " (new weight: " << format("%.20g",oldw + w) << ")\n"); - BlockInformation[BB->getParent()][BB] = oldw + w; -} - -template<> -void ProfileInfoT<Function,BasicBlock>::removeBlock(const BasicBlock *BB) { - std::map<const Function*, BlockCounts>::iterator J = - BlockInformation.find(BB->getParent()); - if (J == BlockInformation.end()) return; - - DEBUG(dbgs() << "Deleting " << BB->getName() << "\n"); - J->second.erase(BB); -} - -template<> -void ProfileInfoT<Function,BasicBlock>::removeEdge(Edge e) { - std::map<const Function*, EdgeWeights>::iterator J = - EdgeInformation.find(getFunction(e)); - if (J == EdgeInformation.end()) return; - - DEBUG(dbgs() << "Deleting" << e << "\n"); - J->second.erase(e); -} - -template<> -void ProfileInfoT<Function,BasicBlock>:: - replaceEdge(const Edge &oldedge, const Edge &newedge) { - double w; - if ((w = getEdgeWeight(newedge)) == MissingValue) { - w = getEdgeWeight(oldedge); - DEBUG(dbgs() << "Replacing " << oldedge << " with " << newedge << "\n"); - } else { - w += getEdgeWeight(oldedge); - DEBUG(dbgs() << "Adding " << oldedge << " to " << newedge << "\n"); - } - setEdgeWeight(newedge,w); - removeEdge(oldedge); -} - -template<> -const BasicBlock *ProfileInfoT<Function,BasicBlock>:: - GetPath(const BasicBlock *Src, const BasicBlock *Dest, - Path &P, unsigned Mode) { - const BasicBlock *BB = 0; - bool hasFoundPath = false; - - std::queue<const BasicBlock *> BFS; - BFS.push(Src); - - while(BFS.size() && !hasFoundPath) { - BB = BFS.front(); - BFS.pop(); - - succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB); - if (Succ == End) { - P[(const BasicBlock*)0] = BB; - if (Mode & GetPathToExit) { - hasFoundPath = true; - BB = 0; - } - } - for(;Succ != End; ++Succ) { - if (P.find(*Succ) != P.end()) continue; - Edge e = getEdge(BB,*Succ); - if ((Mode & GetPathWithNewEdges) && (getEdgeWeight(e) != MissingValue)) continue; - P[*Succ] = BB; - BFS.push(*Succ); - if ((Mode & GetPathToDest) && *Succ == Dest) { - hasFoundPath = true; - BB = *Succ; - break; - } - if ((Mode & GetPathToValue) && (getExecutionCount(*Succ) != MissingValue)) { - hasFoundPath = true; - BB = *Succ; - break; - } - } - } - - return BB; -} - -template<> -void ProfileInfoT<Function,BasicBlock>:: - divertFlow(const Edge &oldedge, const Edge &newedge) { - DEBUG(dbgs() << "Diverting " << oldedge << " via " << newedge ); - - // First check if the old edge was taken, if not, just delete it... - if (getEdgeWeight(oldedge) == 0) { - removeEdge(oldedge); - return; - } - - Path P; - P[newedge.first] = 0; - P[newedge.second] = newedge.first; - const BasicBlock *BB = GetPath(newedge.second,oldedge.second,P,GetPathToExit | GetPathToDest); - - double w = getEdgeWeight (oldedge); - DEBUG(dbgs() << ", Weight: " << format("%.20g",w) << "\n"); - do { - const BasicBlock *Parent = P.find(BB)->second; - Edge e = getEdge(Parent,BB); - double oldw = getEdgeWeight(e); - double oldc = getExecutionCount(e.first); - setEdgeWeight(e, w+oldw); - if (Parent != oldedge.first) { - setExecutionCount(e.first, w+oldc); - } - BB = Parent; - } while (BB != newedge.first); - removeEdge(oldedge); -} - -/// Replaces all occurrences of RmBB in the ProfilingInfo with DestBB. -/// This checks all edges of the function the blocks reside in and replaces the -/// occurrences of RmBB with DestBB. -template<> -void ProfileInfoT<Function,BasicBlock>:: - replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB) { - DEBUG(dbgs() << "Replacing " << RmBB->getName() - << " with " << DestBB->getName() << "\n"); - const Function *F = DestBB->getParent(); - std::map<const Function*, EdgeWeights>::iterator J = - EdgeInformation.find(F); - if (J == EdgeInformation.end()) return; - - Edge e, newedge; - bool erasededge = false; - EdgeWeights::iterator I = J->second.begin(), E = J->second.end(); - while(I != E) { - e = (I++)->first; - bool foundedge = false; bool eraseedge = false; - if (e.first == RmBB) { - if (e.second == DestBB) { - eraseedge = true; - } else { - newedge = getEdge(DestBB, e.second); - foundedge = true; - } - } - if (e.second == RmBB) { - if (e.first == DestBB) { - eraseedge = true; - } else { - newedge = getEdge(e.first, DestBB); - foundedge = true; - } - } - if (foundedge) { - replaceEdge(e, newedge); - } - if (eraseedge) { - if (erasededge) { - Edge newedge = getEdge(DestBB, DestBB); - replaceEdge(e, newedge); - } else { - removeEdge(e); - erasededge = true; - } - } - } -} - -/// Splits an edge in the ProfileInfo and redirects flow over NewBB. -/// Since its possible that there is more than one edge in the CFG from FristBB -/// to SecondBB its necessary to redirect the flow proporionally. -template<> -void ProfileInfoT<Function,BasicBlock>::splitEdge(const BasicBlock *FirstBB, - const BasicBlock *SecondBB, - const BasicBlock *NewBB, - bool MergeIdenticalEdges) { - const Function *F = FirstBB->getParent(); - std::map<const Function*, EdgeWeights>::iterator J = - EdgeInformation.find(F); - if (J == EdgeInformation.end()) return; - - // Generate edges and read current weight. - Edge e = getEdge(FirstBB, SecondBB); - Edge n1 = getEdge(FirstBB, NewBB); - Edge n2 = getEdge(NewBB, SecondBB); - EdgeWeights &ECs = J->second; - double w = ECs[e]; - - int succ_count = 0; - if (!MergeIdenticalEdges) { - // First count the edges from FristBB to SecondBB, if there is more than - // one, only slice out a proporional part for NewBB. - for(succ_const_iterator BBI = succ_begin(FirstBB), BBE = succ_end(FirstBB); - BBI != BBE; ++BBI) { - if (*BBI == SecondBB) succ_count++; - } - // When the NewBB is completely new, increment the count by one so that - // the counts are properly distributed. - if (getExecutionCount(NewBB) == ProfileInfo::MissingValue) succ_count++; - } else { - // When the edges are merged anyway, then redirect all flow. - succ_count = 1; - } - - // We know now how many edges there are from FirstBB to SecondBB, reroute a - // proportional part of the edge weight over NewBB. - double neww = floor(w / succ_count); - ECs[n1] += neww; - ECs[n2] += neww; - BlockInformation[F][NewBB] += neww; - if (succ_count == 1) { - ECs.erase(e); - } else { - ECs[e] -= neww; - } -} - -template<> -void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *Old, - const BasicBlock* New) { - const Function *F = Old->getParent(); - std::map<const Function*, EdgeWeights>::iterator J = - EdgeInformation.find(F); - if (J == EdgeInformation.end()) return; - - DEBUG(dbgs() << "Splitting " << Old->getName() << " to " << New->getName() << "\n"); - - std::set<Edge> Edges; - for (EdgeWeights::iterator ewi = J->second.begin(), ewe = J->second.end(); - ewi != ewe; ++ewi) { - Edge old = ewi->first; - if (old.first == Old) { - Edges.insert(old); - } - } - for (std::set<Edge>::iterator EI = Edges.begin(), EE = Edges.end(); - EI != EE; ++EI) { - Edge newedge = getEdge(New, EI->second); - replaceEdge(*EI, newedge); - } - - double w = getExecutionCount(Old); - setEdgeWeight(getEdge(Old, New), w); - setExecutionCount(New, w); -} - -template<> -void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *BB, - const BasicBlock* NewBB, - BasicBlock *const *Preds, - unsigned NumPreds) { - const Function *F = BB->getParent(); - std::map<const Function*, EdgeWeights>::iterator J = - EdgeInformation.find(F); - if (J == EdgeInformation.end()) return; - - DEBUG(dbgs() << "Splitting " << NumPreds << " Edges from " << BB->getName() - << " to " << NewBB->getName() << "\n"); - - // Collect weight that was redirected over NewBB. - double newweight = 0; - - std::set<const BasicBlock *> ProcessedPreds; - // For all requestes Predecessors. - for (unsigned pred = 0; pred < NumPreds; ++pred) { - const BasicBlock * Pred = Preds[pred]; - if (ProcessedPreds.insert(Pred).second) { - // Create edges and read old weight. - Edge oldedge = getEdge(Pred, BB); - Edge newedge = getEdge(Pred, NewBB); - - // Remember how much weight was redirected. - newweight += getEdgeWeight(oldedge); - - replaceEdge(oldedge,newedge); - } - } - - Edge newedge = getEdge(NewBB,BB); - setEdgeWeight(newedge, newweight); - setExecutionCount(NewBB, newweight); -} - -template<> -void ProfileInfoT<Function,BasicBlock>::transfer(const Function *Old, - const Function *New) { - DEBUG(dbgs() << "Replacing Function " << Old->getName() << " with " - << New->getName() << "\n"); - std::map<const Function*, EdgeWeights>::iterator J = - EdgeInformation.find(Old); - if(J != EdgeInformation.end()) { - EdgeInformation[New] = J->second; - } - EdgeInformation.erase(Old); - BlockInformation.erase(Old); - FunctionInformation.erase(Old); -} - -static double readEdgeOrRemember(ProfileInfo::Edge edge, double w, - ProfileInfo::Edge &tocalc, unsigned &uncalc) { - if (w == ProfileInfo::MissingValue) { - tocalc = edge; - uncalc++; - return 0; - } else { - return w; - } -} - -template<> -bool ProfileInfoT<Function,BasicBlock>:: - CalculateMissingEdge(const BasicBlock *BB, Edge &removed, - bool assumeEmptySelf) { - Edge edgetocalc; - unsigned uncalculated = 0; - - // collect weights of all incoming and outgoing edges, rememer edges that - // have no value - double incount = 0; - SmallSet<const BasicBlock*,8> pred_visited; - const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); - if (bbi==bbe) { - Edge e = getEdge(0,BB); - incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated); - } - for (;bbi != bbe; ++bbi) { - if (pred_visited.insert(*bbi)) { - Edge e = getEdge(*bbi,BB); - incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated); - } - } - - double outcount = 0; - SmallSet<const BasicBlock*,8> succ_visited; - succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB); - if (sbbi==sbbe) { - Edge e = getEdge(BB,0); - if (getEdgeWeight(e) == MissingValue) { - double w = getExecutionCount(BB); - if (w != MissingValue) { - setEdgeWeight(e,w); - removed = e; - } - } - outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated); - } - for (;sbbi != sbbe; ++sbbi) { - if (succ_visited.insert(*sbbi)) { - Edge e = getEdge(BB,*sbbi); - outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated); - } - } - - // if exactly one edge weight was missing, calculate it and remove it from - // spanning tree - if (uncalculated == 0 ) { - return true; - } else - if (uncalculated == 1) { - if (incount < outcount) { - EdgeInformation[BB->getParent()][edgetocalc] = outcount-incount; - } else { - EdgeInformation[BB->getParent()][edgetocalc] = incount-outcount; - } - DEBUG(dbgs() << "--Calc Edge Counter for " << edgetocalc << ": " - << format("%.20g", getEdgeWeight(edgetocalc)) << "\n"); - removed = edgetocalc; - return true; - } else - if (uncalculated == 2 && assumeEmptySelf && edgetocalc.first == edgetocalc.second && incount == outcount) { - setEdgeWeight(edgetocalc, incount * 10); - removed = edgetocalc; - return true; - } else { - return false; - } -} - -static void readEdge(ProfileInfo *PI, ProfileInfo::Edge e, double &calcw, std::set<ProfileInfo::Edge> &misscount) { - double w = PI->getEdgeWeight(e); - if (w != ProfileInfo::MissingValue) { - calcw += w; - } else { - misscount.insert(e); - } -} - -template<> -bool ProfileInfoT<Function,BasicBlock>::EstimateMissingEdges(const BasicBlock *BB) { - double inWeight = 0; - std::set<Edge> inMissing; - std::set<const BasicBlock*> ProcessedPreds; - const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); - if (bbi == bbe) { - readEdge(this,getEdge(0,BB),inWeight,inMissing); - } - for( ; bbi != bbe; ++bbi ) { - if (ProcessedPreds.insert(*bbi).second) { - readEdge(this,getEdge(*bbi,BB),inWeight,inMissing); - } - } - - double outWeight = 0; - std::set<Edge> outMissing; - std::set<const BasicBlock*> ProcessedSuccs; - succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB); - if (sbbi == sbbe) - readEdge(this,getEdge(BB,0),outWeight,outMissing); - for ( ; sbbi != sbbe; ++sbbi ) { - if (ProcessedSuccs.insert(*sbbi).second) { - readEdge(this,getEdge(BB,*sbbi),outWeight,outMissing); - } - } - - double share; - std::set<Edge>::iterator ei,ee; - if (inMissing.size() == 0 && outMissing.size() > 0) { - ei = outMissing.begin(); - ee = outMissing.end(); - share = inWeight/outMissing.size(); - setExecutionCount(BB,inWeight); - } else - if (inMissing.size() > 0 && outMissing.size() == 0 && outWeight == 0) { - ei = inMissing.begin(); - ee = inMissing.end(); - share = 0; - setExecutionCount(BB,0); - } else - if (inMissing.size() == 0 && outMissing.size() == 0) { - setExecutionCount(BB,outWeight); - return true; - } else { - return false; - } - for ( ; ei != ee; ++ei ) { - setEdgeWeight(*ei,share); - } - return true; -} - -template<> -void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) { -// if (getExecutionCount(&(F->getEntryBlock())) == 0) { -// for (Function::const_iterator FI = F->begin(), FE = F->end(); -// FI != FE; ++FI) { -// const BasicBlock* BB = &(*FI); -// { -// const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); -// if (NBB == End) { -// setEdgeWeight(getEdge(0,BB),0); -// } -// for(;NBB != End; ++NBB) { -// setEdgeWeight(getEdge(*NBB,BB),0); -// } -// } -// { -// succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); -// if (NBB == End) { -// setEdgeWeight(getEdge(0,BB),0); -// } -// for(;NBB != End; ++NBB) { -// setEdgeWeight(getEdge(*NBB,BB),0); -// } -// } -// } -// return; -// } - // The set of BasicBlocks that are still unvisited. - std::set<const BasicBlock*> Unvisited; - - // The set of return edges (Edges with no successors). - std::set<Edge> ReturnEdges; - double ReturnWeight = 0; - - // First iterate over the whole function and collect: - // 1) The blocks in this function in the Unvisited set. - // 2) The return edges in the ReturnEdges set. - // 3) The flow that is leaving the function already via return edges. - - // Data structure for searching the function. - std::queue<const BasicBlock *> BFS; - const BasicBlock *BB = &(F->getEntryBlock()); - BFS.push(BB); - Unvisited.insert(BB); - - while (BFS.size()) { - BB = BFS.front(); BFS.pop(); - succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); - if (NBB == End) { - Edge e = getEdge(BB,0); - double w = getEdgeWeight(e); - if (w == MissingValue) { - // If the return edge has no value, try to read value from block. - double bw = getExecutionCount(BB); - if (bw != MissingValue) { - setEdgeWeight(e,bw); - ReturnWeight += bw; - } else { - // If both return edge and block provide no value, collect edge. - ReturnEdges.insert(e); - } - } else { - // If the return edge has a proper value, collect it. - ReturnWeight += w; - } - } - for (;NBB != End; ++NBB) { - if (Unvisited.insert(*NBB).second) { - BFS.push(*NBB); - } - } - } - - while (Unvisited.size() > 0) { - unsigned oldUnvisitedCount = Unvisited.size(); - bool FoundPath = false; - - // If there is only one edge left, calculate it. - if (ReturnEdges.size() == 1) { - ReturnWeight = getExecutionCount(&(F->getEntryBlock())) - ReturnWeight; - - Edge e = *ReturnEdges.begin(); - setEdgeWeight(e,ReturnWeight); - setExecutionCount(e.first,ReturnWeight); - - Unvisited.erase(e.first); - ReturnEdges.erase(e); - continue; - } - - // Calculate all blocks where only one edge is missing, this may also - // resolve furhter return edges. - std::set<const BasicBlock *>::iterator FI = Unvisited.begin(), FE = Unvisited.end(); - while(FI != FE) { - const BasicBlock *BB = *FI; ++FI; - Edge e; - if(CalculateMissingEdge(BB,e,true)) { - if (BlockInformation[F].find(BB) == BlockInformation[F].end()) { - setExecutionCount(BB,getExecutionCount(BB)); - } - Unvisited.erase(BB); - if (e.first != 0 && e.second == 0) { - ReturnEdges.erase(e); - ReturnWeight += getEdgeWeight(e); - } - } - } - if (oldUnvisitedCount > Unvisited.size()) continue; - - // Estimate edge weights by dividing the flow proportionally. - FI = Unvisited.begin(), FE = Unvisited.end(); - while(FI != FE) { - const BasicBlock *BB = *FI; ++FI; - const BasicBlock *Dest = 0; - bool AllEdgesHaveSameReturn = true; - // Check each Successor, these must all end up in the same or an empty - // return block otherwise its dangerous to do an estimation on them. - for (succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB); - Succ != End; ++Succ) { - Path P; - GetPath(*Succ, 0, P, GetPathToExit); - if (Dest && Dest != P[(const BasicBlock*)0]) { - AllEdgesHaveSameReturn = false; - } - Dest = P[(const BasicBlock*)0]; - } - if (AllEdgesHaveSameReturn) { - if(EstimateMissingEdges(BB)) { - Unvisited.erase(BB); - break; - } - } - } - if (oldUnvisitedCount > Unvisited.size()) continue; - - // Check if there is a path to an block that has a known value and redirect - // flow accordingly. - FI = Unvisited.begin(), FE = Unvisited.end(); - while(FI != FE && !FoundPath) { - // Fetch path. - const BasicBlock *BB = *FI; ++FI; - Path P; - const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToValue); - - // Calculate incoming flow. - double iw = 0; unsigned inmissing = 0; unsigned incount = 0; unsigned invalid = 0; - std::set<const BasicBlock *> Processed; - for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); - NBB != End; ++NBB) { - if (Processed.insert(*NBB).second) { - Edge e = getEdge(*NBB, BB); - double ew = getEdgeWeight(e); - if (ew != MissingValue) { - iw += ew; - invalid++; - } else { - // If the path contains the successor, this means its a backedge, - // do not count as missing. - if (P.find(*NBB) == P.end()) - inmissing++; - } - incount++; - } - } - if (inmissing == incount) continue; - if (invalid == 0) continue; - - // Subtract (already) outgoing flow. - Processed.clear(); - for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); - NBB != End; ++NBB) { - if (Processed.insert(*NBB).second) { - Edge e = getEdge(BB, *NBB); - double ew = getEdgeWeight(e); - if (ew != MissingValue) { - iw -= ew; - } - } - } - if (iw < 0) continue; - - // Check the receiving end of the path if it can handle the flow. - double ow = getExecutionCount(Dest); - Processed.clear(); - for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); - NBB != End; ++NBB) { - if (Processed.insert(*NBB).second) { - Edge e = getEdge(BB, *NBB); - double ew = getEdgeWeight(e); - if (ew != MissingValue) { - ow -= ew; - } - } - } - if (ow < 0) continue; - - // Determine how much flow shall be used. - double ew = getEdgeWeight(getEdge(P[Dest],Dest)); - if (ew != MissingValue) { - ew = ew<ow?ew:ow; - ew = ew<iw?ew:iw; - } else { - if (inmissing == 0) - ew = iw; - } - - // Create flow. - if (ew != MissingValue) { - do { - Edge e = getEdge(P[Dest],Dest); - if (getEdgeWeight(e) == MissingValue) { - setEdgeWeight(e,ew); - FoundPath = true; - } - Dest = P[Dest]; - } while (Dest != BB); - } - } - if (FoundPath) continue; - - // Calculate a block with self loop. - FI = Unvisited.begin(), FE = Unvisited.end(); - while(FI != FE && !FoundPath) { - const BasicBlock *BB = *FI; ++FI; - bool SelfEdgeFound = false; - for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); - NBB != End; ++NBB) { - if (*NBB == BB) { - SelfEdgeFound = true; - break; - } - } - if (SelfEdgeFound) { - Edge e = getEdge(BB,BB); - if (getEdgeWeight(e) == MissingValue) { - double iw = 0; - std::set<const BasicBlock *> Processed; - for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); - NBB != End; ++NBB) { - if (Processed.insert(*NBB).second) { - Edge e = getEdge(*NBB, BB); - double ew = getEdgeWeight(e); - if (ew != MissingValue) { - iw += ew; - } - } - } - setEdgeWeight(e,iw * 10); - FoundPath = true; - } - } - } - if (FoundPath) continue; - - // Determine backedges, set them to zero. - FI = Unvisited.begin(), FE = Unvisited.end(); - while(FI != FE && !FoundPath) { - const BasicBlock *BB = *FI; ++FI; - const BasicBlock *Dest = 0; - Path P; - bool BackEdgeFound = false; - for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); - NBB != End; ++NBB) { - Dest = GetPath(BB, *NBB, P, GetPathToDest | GetPathWithNewEdges); - if (Dest == *NBB) { - BackEdgeFound = true; - break; - } - } - if (BackEdgeFound) { - Edge e = getEdge(Dest,BB); - double w = getEdgeWeight(e); - if (w == MissingValue) { - setEdgeWeight(e,0); - FoundPath = true; - } - do { - Edge e = getEdge(P[Dest], Dest); - double w = getEdgeWeight(e); - if (w == MissingValue) { - setEdgeWeight(e,0); - FoundPath = true; - } - Dest = P[Dest]; - } while (Dest != BB); - } - } - if (FoundPath) continue; - - // Channel flow to return block. - FI = Unvisited.begin(), FE = Unvisited.end(); - while(FI != FE && !FoundPath) { - const BasicBlock *BB = *FI; ++FI; - - Path P; - const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges); - Dest = P[(const BasicBlock*)0]; - if (!Dest) continue; - - if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) { - // Calculate incoming flow. - double iw = 0; - std::set<const BasicBlock *> Processed; - for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); - NBB != End; ++NBB) { - if (Processed.insert(*NBB).second) { - Edge e = getEdge(*NBB, BB); - double ew = getEdgeWeight(e); - if (ew != MissingValue) { - iw += ew; - } - } - } - do { - Edge e = getEdge(P[Dest], Dest); - double w = getEdgeWeight(e); - if (w == MissingValue) { - setEdgeWeight(e,iw); - FoundPath = true; - } else { - assert(0 && "Edge should not have value already!"); - } - Dest = P[Dest]; - } while (Dest != BB); - } - } - if (FoundPath) continue; - - // Speculatively set edges to zero. - FI = Unvisited.begin(), FE = Unvisited.end(); - while(FI != FE && !FoundPath) { - const BasicBlock *BB = *FI; ++FI; - - for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); - NBB != End; ++NBB) { - Edge e = getEdge(*NBB,BB); - double w = getEdgeWeight(e); - if (w == MissingValue) { - setEdgeWeight(e,0); - FoundPath = true; - break; - } - } - } - if (FoundPath) continue; - - errs() << "{"; - FI = Unvisited.begin(), FE = Unvisited.end(); - while(FI != FE) { - const BasicBlock *BB = *FI; ++FI; - dbgs() << BB->getName(); - if (FI != FE) - dbgs() << ","; - } - errs() << "}"; - - errs() << "ASSERT: could not repair function"; - assert(0 && "could not repair function"); - } - - EdgeWeights J = EdgeInformation[F]; - for (EdgeWeights::iterator EI = J.begin(), EE = J.end(); EI != EE; ++EI) { - Edge e = EI->first; - - bool SuccFound = false; - if (e.first != 0) { - succ_const_iterator NBB = succ_begin(e.first), End = succ_end(e.first); - if (NBB == End) { - if (0 == e.second) { - SuccFound = true; - } - } - for (;NBB != End; ++NBB) { - if (*NBB == e.second) { - SuccFound = true; - break; - } - } - if (!SuccFound) { - removeEdge(e); - } - } - } -} - -raw_ostream& operator<<(raw_ostream &O, const MachineFunction *MF) { - return O << MF->getFunction()->getName() << "(MF)"; -} - -raw_ostream& operator<<(raw_ostream &O, const MachineBasicBlock *MBB) { - return O << MBB->getBasicBlock()->getName() << "(MB)"; -} - -raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, const MachineBasicBlock *> E) { - O << "("; - - if (E.first) - O << E.first; - else - O << "0"; - - O << ","; - - if (E.second) - O << E.second; - else - O << "0"; - - return O << ")"; -} - -} // namespace llvm - -//===----------------------------------------------------------------------===// -// NoProfile ProfileInfo implementation -// - -namespace { - struct NoProfileInfo : public ImmutablePass, public ProfileInfo { - static char ID; // Class identification, replacement for typeinfo - NoProfileInfo() : ImmutablePass(ID) { - initializeNoProfileInfoPass(*PassRegistry::getPassRegistry()); - } - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { - if (PI == &ProfileInfo::ID) - return (ProfileInfo*)this; - return this; - } - - virtual const char *getPassName() const { - return "NoProfileInfo"; - } - }; -} // End of anonymous namespace - -char NoProfileInfo::ID = 0; -// Register this pass... -INITIALIZE_AG_PASS(NoProfileInfo, ProfileInfo, "no-profile", - "No Profile Information", false, true, true) - -ImmutablePass *llvm::createNoProfileInfoPass() { return new NoProfileInfo(); } diff --git a/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp b/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp deleted file mode 100644 index f1f3e940..0000000 --- a/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp +++ /dev/null @@ -1,155 +0,0 @@ -//===- ProfileInfoLoad.cpp - Load profile information from disk -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// The ProfileInfoLoader class is used to load and represent profiling -// information read in from the dump file. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/ProfileInfoLoader.h" -#include "llvm/Analysis/ProfileInfoTypes.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/raw_ostream.h" -#include <cstdio> -#include <cstdlib> -using namespace llvm; - -// ByteSwap - Byteswap 'Var' if 'Really' is true. -// -static inline unsigned ByteSwap(unsigned Var, bool Really) { - if (!Really) return Var; - return ((Var & (255U<< 0U)) << 24U) | - ((Var & (255U<< 8U)) << 8U) | - ((Var & (255U<<16U)) >> 8U) | - ((Var & (255U<<24U)) >> 24U); -} - -static unsigned AddCounts(unsigned A, unsigned B) { - // If either value is undefined, use the other. - if (A == ProfileInfoLoader::Uncounted) return B; - if (B == ProfileInfoLoader::Uncounted) return A; - return A + B; -} - -static void ReadProfilingBlock(const char *ToolName, FILE *F, - bool ShouldByteSwap, - std::vector<unsigned> &Data) { - // Read the number of entries... - unsigned NumEntries; - if (fread(&NumEntries, sizeof(unsigned), 1, F) != 1) { - errs() << ToolName << ": data packet truncated!\n"; - perror(0); - exit(1); - } - NumEntries = ByteSwap(NumEntries, ShouldByteSwap); - - // Read the counts... - std::vector<unsigned> TempSpace(NumEntries); - - // Read in the block of data... - if (fread(&TempSpace[0], sizeof(unsigned)*NumEntries, 1, F) != 1) { - errs() << ToolName << ": data packet truncated!\n"; - perror(0); - exit(1); - } - - // Make sure we have enough space... The space is initialised to -1 to - // facitiltate the loading of missing values for OptimalEdgeProfiling. - if (Data.size() < NumEntries) - Data.resize(NumEntries, ProfileInfoLoader::Uncounted); - - // Accumulate the data we just read into the data. - if (!ShouldByteSwap) { - for (unsigned i = 0; i != NumEntries; ++i) { - Data[i] = AddCounts(TempSpace[i], Data[i]); - } - } else { - for (unsigned i = 0; i != NumEntries; ++i) { - Data[i] = AddCounts(ByteSwap(TempSpace[i], true), Data[i]); - } - } -} - -const unsigned ProfileInfoLoader::Uncounted = ~0U; - -// ProfileInfoLoader ctor - Read the specified profiling data file, exiting the -// program if the file is invalid or broken. -// -ProfileInfoLoader::ProfileInfoLoader(const char *ToolName, - const std::string &Filename) - : Filename(Filename) { - FILE *F = fopen(Filename.c_str(), "rb"); - if (F == 0) { - errs() << ToolName << ": Error opening '" << Filename << "': "; - perror(0); - exit(1); - } - - // Keep reading packets until we run out of them. - unsigned PacketType; - while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) { - // If the low eight bits of the packet are zero, we must be dealing with an - // endianness mismatch. Byteswap all words read from the profiling - // information. - bool ShouldByteSwap = (char)PacketType == 0; - PacketType = ByteSwap(PacketType, ShouldByteSwap); - - switch (PacketType) { - case ArgumentInfo: { - unsigned ArgLength; - if (fread(&ArgLength, sizeof(unsigned), 1, F) != 1) { - errs() << ToolName << ": arguments packet truncated!\n"; - perror(0); - exit(1); - } - ArgLength = ByteSwap(ArgLength, ShouldByteSwap); - - // Read in the arguments... - std::vector<char> Chars(ArgLength+4); - - if (ArgLength) - if (fread(&Chars[0], (ArgLength+3) & ~3, 1, F) != 1) { - errs() << ToolName << ": arguments packet truncated!\n"; - perror(0); - exit(1); - } - CommandLines.push_back(std::string(&Chars[0], &Chars[ArgLength])); - break; - } - - case FunctionInfo: - ReadProfilingBlock(ToolName, F, ShouldByteSwap, FunctionCounts); - break; - - case BlockInfo: - ReadProfilingBlock(ToolName, F, ShouldByteSwap, BlockCounts); - break; - - case EdgeInfo: - ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts); - break; - - case OptEdgeInfo: - ReadProfilingBlock(ToolName, F, ShouldByteSwap, OptimalEdgeCounts); - break; - - case BBTraceInfo: - ReadProfilingBlock(ToolName, F, ShouldByteSwap, BBTrace); - break; - - default: - errs() << ToolName << ": Unknown packet type #" << PacketType << "!\n"; - exit(1); - } - } - - fclose(F); -} - diff --git a/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp b/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp deleted file mode 100644 index 346f8d6..0000000 --- a/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp +++ /dev/null @@ -1,267 +0,0 @@ -//===- ProfileInfoLoaderPass.cpp - LLVM Pass to load profile info ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a concrete implementation of profiling information that -// loads the information from a profile dump file. -// -//===----------------------------------------------------------------------===// -#define DEBUG_TYPE "profile-loader" -#include "llvm/Analysis/Passes.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/ProfileInfo.h" -#include "llvm/Analysis/ProfileInfoLoader.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -#include <set> -using namespace llvm; - -STATISTIC(NumEdgesRead, "The # of edges read."); - -static cl::opt<std::string> -ProfileInfoFilename("profile-info-file", cl::init("llvmprof.out"), - cl::value_desc("filename"), - cl::desc("Profile file loaded by -profile-loader")); - -namespace { - class LoaderPass : public ModulePass, public ProfileInfo { - std::string Filename; - std::set<Edge> SpanningTree; - std::set<const BasicBlock*> BBisUnvisited; - unsigned ReadCount; - public: - static char ID; // Class identification, replacement for typeinfo - explicit LoaderPass(const std::string &filename = "") - : ModulePass(ID), Filename(filename) { - initializeLoaderPassPass(*PassRegistry::getPassRegistry()); - if (filename.empty()) Filename = ProfileInfoFilename; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - } - - virtual const char *getPassName() const { - return "Profiling information loader"; - } - - // recurseBasicBlock() - Calculates the edge weights for as much basic - // blocks as possbile. - virtual void recurseBasicBlock(const BasicBlock *BB); - virtual void readEdgeOrRemember(Edge, Edge&, unsigned &, double &); - virtual void readEdge(ProfileInfo::Edge, std::vector<unsigned>&); - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { - if (PI == &ProfileInfo::ID) - return (ProfileInfo*)this; - return this; - } - - /// run - Load the profile information from the specified file. - virtual bool runOnModule(Module &M); - }; -} // End of anonymous namespace - -char LoaderPass::ID = 0; -INITIALIZE_AG_PASS(LoaderPass, ProfileInfo, "profile-loader", - "Load profile information from llvmprof.out", false, true, false) - -char &llvm::ProfileLoaderPassID = LoaderPass::ID; - -ModulePass *llvm::createProfileLoaderPass() { return new LoaderPass(); } - -/// createProfileLoaderPass - This function returns a Pass that loads the -/// profiling information for the module from the specified filename, making it -/// available to the optimizers. -Pass *llvm::createProfileLoaderPass(const std::string &Filename) { - return new LoaderPass(Filename); -} - -void LoaderPass::readEdgeOrRemember(Edge edge, Edge &tocalc, - unsigned &uncalc, double &count) { - double w; - if ((w = getEdgeWeight(edge)) == MissingValue) { - tocalc = edge; - uncalc++; - } else { - count+=w; - } -} - -// recurseBasicBlock - Visits all neighbours of a block and then tries to -// calculate the missing edge values. -void LoaderPass::recurseBasicBlock(const BasicBlock *BB) { - - // break recursion if already visited - if (BBisUnvisited.find(BB) == BBisUnvisited.end()) return; - BBisUnvisited.erase(BB); - if (!BB) return; - - for (succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); - bbi != bbe; ++bbi) { - recurseBasicBlock(*bbi); - } - for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); - bbi != bbe; ++bbi) { - recurseBasicBlock(*bbi); - } - - Edge tocalc; - if (CalculateMissingEdge(BB, tocalc)) { - SpanningTree.erase(tocalc); - } -} - -void LoaderPass::readEdge(ProfileInfo::Edge e, - std::vector<unsigned> &ECs) { - if (ReadCount < ECs.size()) { - double weight = ECs[ReadCount++]; - if (weight != ProfileInfoLoader::Uncounted) { - // Here the data realm changes from the unsigned of the file to the - // double of the ProfileInfo. This conversion is save because we know - // that everything thats representable in unsinged is also representable - // in double. - EdgeInformation[getFunction(e)][e] += (double)weight; - - DEBUG(dbgs() << "--Read Edge Counter for " << e - << " (# "<< (ReadCount-1) << "): " - << (unsigned)getEdgeWeight(e) << "\n"); - } else { - // This happens only if reading optimal profiling information, not when - // reading regular profiling information. - SpanningTree.insert(e); - } - } -} - -bool LoaderPass::runOnModule(Module &M) { - ProfileInfoLoader PIL("profile-loader", Filename); - - EdgeInformation.clear(); - std::vector<unsigned> Counters = PIL.getRawEdgeCounts(); - if (Counters.size() > 0) { - ReadCount = 0; - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (F->isDeclaration()) continue; - DEBUG(dbgs() << "Working on " << F->getName() << "\n"); - readEdge(getEdge(0,&F->getEntryBlock()), Counters); - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - TerminatorInst *TI = BB->getTerminator(); - for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) { - readEdge(getEdge(BB,TI->getSuccessor(s)), Counters); - } - } - } - if (ReadCount != Counters.size()) { - errs() << "WARNING: profile information is inconsistent with " - << "the current program!\n"; - } - NumEdgesRead = ReadCount; - } - - Counters = PIL.getRawOptimalEdgeCounts(); - if (Counters.size() > 0) { - ReadCount = 0; - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (F->isDeclaration()) continue; - DEBUG(dbgs() << "Working on " << F->getName() << "\n"); - readEdge(getEdge(0,&F->getEntryBlock()), Counters); - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - TerminatorInst *TI = BB->getTerminator(); - if (TI->getNumSuccessors() == 0) { - readEdge(getEdge(BB,0), Counters); - } - for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) { - readEdge(getEdge(BB,TI->getSuccessor(s)), Counters); - } - } - while (SpanningTree.size() > 0) { - - unsigned size = SpanningTree.size(); - - BBisUnvisited.clear(); - for (std::set<Edge>::iterator ei = SpanningTree.begin(), - ee = SpanningTree.end(); ei != ee; ++ei) { - BBisUnvisited.insert(ei->first); - BBisUnvisited.insert(ei->second); - } - while (BBisUnvisited.size() > 0) { - recurseBasicBlock(*BBisUnvisited.begin()); - } - - if (SpanningTree.size() == size) { - DEBUG(dbgs()<<"{"); - for (std::set<Edge>::iterator ei = SpanningTree.begin(), - ee = SpanningTree.end(); ei != ee; ++ei) { - DEBUG(dbgs()<< *ei <<","); - } - assert(0 && "No edge calculated!"); - } - - } - } - if (ReadCount != Counters.size()) { - errs() << "WARNING: profile information is inconsistent with " - << "the current program!\n"; - } - NumEdgesRead = ReadCount; - } - - BlockInformation.clear(); - Counters = PIL.getRawBlockCounts(); - if (Counters.size() > 0) { - ReadCount = 0; - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (F->isDeclaration()) continue; - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - if (ReadCount < Counters.size()) - // Here the data realm changes from the unsigned of the file to the - // double of the ProfileInfo. This conversion is save because we know - // that everything thats representable in unsinged is also - // representable in double. - BlockInformation[F][BB] = (double)Counters[ReadCount++]; - } - if (ReadCount != Counters.size()) { - errs() << "WARNING: profile information is inconsistent with " - << "the current program!\n"; - } - } - - FunctionInformation.clear(); - Counters = PIL.getRawFunctionCounts(); - if (Counters.size() > 0) { - ReadCount = 0; - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (F->isDeclaration()) continue; - if (ReadCount < Counters.size()) - // Here the data realm changes from the unsigned of the file to the - // double of the ProfileInfo. This conversion is save because we know - // that everything thats representable in unsinged is also - // representable in double. - FunctionInformation[F] = (double)Counters[ReadCount++]; - } - if (ReadCount != Counters.size()) { - errs() << "WARNING: profile information is inconsistent with " - << "the current program!\n"; - } - } - - return false; -} diff --git a/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp b/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp deleted file mode 100644 index c8896de..0000000 --- a/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp +++ /dev/null @@ -1,383 +0,0 @@ -//===- ProfileVerifierPass.cpp - LLVM Pass to estimate profile info -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a pass that checks profiling information for -// plausibility. -// -//===----------------------------------------------------------------------===// -#define DEBUG_TYPE "profile-verifier" -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/ProfileInfo.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/CallSite.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/InstIterator.h" -#include "llvm/Support/raw_ostream.h" -#include <set> -using namespace llvm; - -static cl::opt<bool,false> -ProfileVerifierDisableAssertions("profile-verifier-noassert", - cl::desc("Disable assertions")); - -namespace { - template<class FType, class BType> - class ProfileVerifierPassT : public FunctionPass { - - struct DetailedBlockInfo { - const BType *BB; - double BBWeight; - double inWeight; - int inCount; - double outWeight; - int outCount; - }; - - ProfileInfoT<FType, BType> *PI; - std::set<const BType*> BBisVisited; - std::set<const FType*> FisVisited; - bool DisableAssertions; - - // When debugging is enabled, the verifier prints a whole slew of debug - // information, otherwise its just the assert. These are all the helper - // functions. - bool PrintedDebugTree; - std::set<const BType*> BBisPrinted; - void debugEntry(DetailedBlockInfo*); - void printDebugInfo(const BType *BB); - - public: - static char ID; // Class identification, replacement for typeinfo - - explicit ProfileVerifierPassT () : FunctionPass(ID) { - initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry()); - DisableAssertions = ProfileVerifierDisableAssertions; - } - explicit ProfileVerifierPassT (bool da) : FunctionPass(ID), - DisableAssertions(da) { - initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequired<ProfileInfoT<FType, BType> >(); - } - - const char *getPassName() const { - return "Profiling information verifier"; - } - - /// run - Verify the profile information. - bool runOnFunction(FType &F); - void recurseBasicBlock(const BType*); - - bool exitReachable(const FType*); - double ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge); - void CheckValue(bool, const char*, DetailedBlockInfo*); - }; - - typedef ProfileVerifierPassT<Function, BasicBlock> ProfileVerifierPass; - - template<class FType, class BType> - void ProfileVerifierPassT<FType, BType>::printDebugInfo(const BType *BB) { - - if (BBisPrinted.find(BB) != BBisPrinted.end()) return; - - double BBWeight = PI->getExecutionCount(BB); - if (BBWeight == ProfileInfoT<FType, BType>::MissingValue) { BBWeight = 0; } - double inWeight = 0; - int inCount = 0; - std::set<const BType*> ProcessedPreds; - for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); - bbi != bbe; ++bbi ) { - if (ProcessedPreds.insert(*bbi).second) { - typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(*bbi,BB); - double EdgeWeight = PI->getEdgeWeight(E); - if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; } - dbgs() << "calculated in-edge " << E << ": " - << format("%20.20g",EdgeWeight) << "\n"; - inWeight += EdgeWeight; - inCount++; - } - } - double outWeight = 0; - int outCount = 0; - std::set<const BType*> ProcessedSuccs; - for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); - bbi != bbe; ++bbi ) { - if (ProcessedSuccs.insert(*bbi).second) { - typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(BB,*bbi); - double EdgeWeight = PI->getEdgeWeight(E); - if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; } - dbgs() << "calculated out-edge " << E << ": " - << format("%20.20g",EdgeWeight) << "\n"; - outWeight += EdgeWeight; - outCount++; - } - } - dbgs() << "Block " << BB->getName() << " in " - << BB->getParent()->getName() << ":" - << "BBWeight=" << format("%20.20g",BBWeight) << "," - << "inWeight=" << format("%20.20g",inWeight) << "," - << "inCount=" << inCount << "," - << "outWeight=" << format("%20.20g",outWeight) << "," - << "outCount" << outCount << "\n"; - - // mark as visited and recurse into subnodes - BBisPrinted.insert(BB); - for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); - bbi != bbe; ++bbi ) { - printDebugInfo(*bbi); - } - } - - template<class FType, class BType> - void ProfileVerifierPassT<FType, BType>::debugEntry (DetailedBlockInfo *DI) { - dbgs() << "TROUBLE: Block " << DI->BB->getName() << " in " - << DI->BB->getParent()->getName() << ":" - << "BBWeight=" << format("%20.20g",DI->BBWeight) << "," - << "inWeight=" << format("%20.20g",DI->inWeight) << "," - << "inCount=" << DI->inCount << "," - << "outWeight=" << format("%20.20g",DI->outWeight) << "," - << "outCount=" << DI->outCount << "\n"; - if (!PrintedDebugTree) { - PrintedDebugTree = true; - printDebugInfo(&(DI->BB->getParent()->getEntryBlock())); - } - } - - // This compares A and B for equality. - static bool Equals(double A, double B) { - return A == B; - } - - // This checks if the function "exit" is reachable from an given function - // via calls, this is necessary to check if a profile is valid despite the - // counts not fitting exactly. - template<class FType, class BType> - bool ProfileVerifierPassT<FType, BType>::exitReachable(const FType *F) { - if (!F) return false; - - if (FisVisited.count(F)) return false; - - FType *Exit = F->getParent()->getFunction("exit"); - if (Exit == F) { - return true; - } - - FisVisited.insert(F); - bool exits = false; - for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { - if (const CallInst *CI = dyn_cast<CallInst>(&*I)) { - FType *F = CI->getCalledFunction(); - if (F) { - exits |= exitReachable(F); - } else { - // This is a call to a pointer, all bets are off... - exits = true; - } - if (exits) break; - } - } - return exits; - } - - #define ASSERTMESSAGE(M) \ - { dbgs() << "ASSERT:" << (M) << "\n"; \ - if (!DisableAssertions) assert(0 && (M)); } - - template<class FType, class BType> - double ProfileVerifierPassT<FType, BType>::ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge E) { - double EdgeWeight = PI->getEdgeWeight(E); - if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { - dbgs() << "Edge " << E << " in Function " - << ProfileInfoT<FType, BType>::getFunction(E)->getName() << ": "; - ASSERTMESSAGE("Edge has missing value"); - return 0; - } else { - if (EdgeWeight < 0) { - dbgs() << "Edge " << E << " in Function " - << ProfileInfoT<FType, BType>::getFunction(E)->getName() << ": "; - ASSERTMESSAGE("Edge has negative value"); - } - return EdgeWeight; - } - } - - template<class FType, class BType> - void ProfileVerifierPassT<FType, BType>::CheckValue(bool Error, - const char *Message, - DetailedBlockInfo *DI) { - if (Error) { - DEBUG(debugEntry(DI)); - dbgs() << "Block " << DI->BB->getName() << " in Function " - << DI->BB->getParent()->getName() << ": "; - ASSERTMESSAGE(Message); - } - return; - } - - // This calculates the Information for a block and then recurses into the - // successors. - template<class FType, class BType> - void ProfileVerifierPassT<FType, BType>::recurseBasicBlock(const BType *BB) { - - // Break the recursion by remembering all visited blocks. - if (BBisVisited.find(BB) != BBisVisited.end()) return; - - // Use a data structure to store all the information, this can then be handed - // to debug printers. - DetailedBlockInfo DI; - DI.BB = BB; - DI.outCount = DI.inCount = 0; - DI.inWeight = DI.outWeight = 0; - - // Read predecessors. - std::set<const BType*> ProcessedPreds; - const_pred_iterator bpi = pred_begin(BB), bpe = pred_end(BB); - // If there are none, check for (0,BB) edge. - if (bpi == bpe) { - DI.inWeight += ReadOrAssert(PI->getEdge(0,BB)); - DI.inCount++; - } - for (;bpi != bpe; ++bpi) { - if (ProcessedPreds.insert(*bpi).second) { - DI.inWeight += ReadOrAssert(PI->getEdge(*bpi,BB)); - DI.inCount++; - } - } - - // Read successors. - std::set<const BType*> ProcessedSuccs; - succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); - // If there is an (0,BB) edge, consider it too. (This is done not only when - // there are no successors, but every time; not every function contains - // return blocks with no successors (think loop latch as return block)). - double w = PI->getEdgeWeight(PI->getEdge(BB,0)); - if (w != ProfileInfoT<FType, BType>::MissingValue) { - DI.outWeight += w; - DI.outCount++; - } - for (;bbi != bbe; ++bbi) { - if (ProcessedSuccs.insert(*bbi).second) { - DI.outWeight += ReadOrAssert(PI->getEdge(BB,*bbi)); - DI.outCount++; - } - } - - // Read block weight. - DI.BBWeight = PI->getExecutionCount(BB); - CheckValue(DI.BBWeight == ProfileInfoT<FType, BType>::MissingValue, - "BasicBlock has missing value", &DI); - CheckValue(DI.BBWeight < 0, - "BasicBlock has negative value", &DI); - - // Check if this block is a setjmp target. - bool isSetJmpTarget = false; - if (DI.outWeight > DI.inWeight) { - for (typename BType::const_iterator i = BB->begin(), ie = BB->end(); - i != ie; ++i) { - if (const CallInst *CI = dyn_cast<CallInst>(&*i)) { - FType *F = CI->getCalledFunction(); - if (F && (F->getName() == "_setjmp")) { - isSetJmpTarget = true; break; - } - } - } - } - // Check if this block is eventually reaching exit. - bool isExitReachable = false; - if (DI.inWeight > DI.outWeight) { - for (typename BType::const_iterator i = BB->begin(), ie = BB->end(); - i != ie; ++i) { - if (const CallInst *CI = dyn_cast<CallInst>(&*i)) { - FType *F = CI->getCalledFunction(); - if (F) { - FisVisited.clear(); - isExitReachable |= exitReachable(F); - } else { - // This is a call to a pointer, all bets are off... - isExitReachable = true; - } - if (isExitReachable) break; - } - } - } - - if (DI.inCount > 0 && DI.outCount == 0) { - // If this is a block with no successors. - if (!isSetJmpTarget) { - CheckValue(!Equals(DI.inWeight,DI.BBWeight), - "inWeight and BBWeight do not match", &DI); - } - } else if (DI.inCount == 0 && DI.outCount > 0) { - // If this is a block with no predecessors. - if (!isExitReachable) - CheckValue(!Equals(DI.BBWeight,DI.outWeight), - "BBWeight and outWeight do not match", &DI); - } else { - // If this block has successors and predecessors. - if (DI.inWeight > DI.outWeight && !isExitReachable) - CheckValue(!Equals(DI.inWeight,DI.outWeight), - "inWeight and outWeight do not match", &DI); - if (DI.inWeight < DI.outWeight && !isSetJmpTarget) - CheckValue(!Equals(DI.inWeight,DI.outWeight), - "inWeight and outWeight do not match", &DI); - } - - - // Mark this block as visited, rescurse into successors. - BBisVisited.insert(BB); - for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); - bbi != bbe; ++bbi ) { - recurseBasicBlock(*bbi); - } - } - - template<class FType, class BType> - bool ProfileVerifierPassT<FType, BType>::runOnFunction(FType &F) { - PI = getAnalysisIfAvailable<ProfileInfoT<FType, BType> >(); - if (!PI) - ASSERTMESSAGE("No ProfileInfo available"); - - // Prepare global variables. - PrintedDebugTree = false; - BBisVisited.clear(); - - // Fetch entry block and recurse into it. - const BType *entry = &F.getEntryBlock(); - recurseBasicBlock(entry); - - if (PI->getExecutionCount(&F) != PI->getExecutionCount(entry)) - ASSERTMESSAGE("Function count and entry block count do not match"); - - return false; - } - - template<class FType, class BType> - char ProfileVerifierPassT<FType, BType>::ID = 0; -} - -INITIALIZE_PASS_BEGIN(ProfileVerifierPass, "profile-verifier", - "Verify profiling information", false, true) -INITIALIZE_AG_DEPENDENCY(ProfileInfo) -INITIALIZE_PASS_END(ProfileVerifierPass, "profile-verifier", - "Verify profiling information", false, true) - -namespace llvm { - FunctionPass *createProfileVerifierPass() { - return new ProfileVerifierPass(ProfileVerifierDisableAssertions); - } -} - diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp index 8577025..5635688 100644 --- a/contrib/llvm/lib/Analysis/RegionInfo.cpp +++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp @@ -9,6 +9,7 @@ // Detects single entry single exit regions in the control flow graph. //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "region" #include "llvm/Analysis/RegionInfo.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" @@ -17,12 +18,9 @@ #include "llvm/Assembly/Writer.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" - -#define DEBUG_TYPE "region" #include "llvm/Support/Debug.h" - -#include <set> #include <algorithm> +#include <set> using namespace llvm; diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp index f876748..0a02f4e 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp @@ -585,6 +585,9 @@ namespace { // Lexicographically compare n-ary expressions. unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands(); + if (LNumOps != RNumOps) + return (int)LNumOps - (int)RNumOps; + for (unsigned i = 0; i != LNumOps; ++i) { if (i >= RNumOps) return 1; @@ -758,7 +761,7 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, unsigned CalculationBits = W + T; // Calculate 2^T, at width T+W. - APInt DivFactor = APInt(CalculationBits, 1).shl(T); + APInt DivFactor = APInt::getOneBitSet(CalculationBits, T); // Calculate the multiplicative inverse of K! / 2^T; // this multiplication factor will perform the exact division by @@ -1380,7 +1383,7 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, /// static bool CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M, - SmallVector<const SCEV *, 8> &NewOps, + SmallVectorImpl<const SCEV *> &NewOps, APInt &AccumulatedConstant, const SCEV *const *Ops, size_t NumOperands, const APInt &Scale, @@ -1628,7 +1631,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, // re-generate the operands list. Group the operands by constant scale, // to avoid multiplying by the same constant scale multiple times. std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists; - for (SmallVector<const SCEV *, 8>::const_iterator I = NewOps.begin(), + for (SmallVectorImpl<const SCEV *>::const_iterator I = NewOps.begin(), E = NewOps.end(); I != E; ++I) MulOpLists[M.find(*I)->second].push_back(*I); // Re-generate the operands list. @@ -2587,55 +2590,39 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); } -const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) { +const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { // If we have DataLayout, we can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. if (TD) - return getConstant(TD->getIntPtrType(getContext()), - TD->getTypeAllocSize(AllocTy)); + return getConstant(IntTy, TD->getTypeAllocSize(AllocTy)); Constant *C = ConstantExpr::getSizeOf(AllocTy); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) C = Folded; Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); + assert(Ty == IntTy && "Effective SCEV type doesn't match"); return getTruncateOrZeroExtend(getSCEV(C), Ty); } -const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) { - Constant *C = ConstantExpr::getAlignOf(AllocTy); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) - C = Folded; - Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); - return getTruncateOrZeroExtend(getSCEV(C), Ty); -} - -const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy, +const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy, + StructType *STy, unsigned FieldNo) { // If we have DataLayout, we can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. - if (TD) - return getConstant(TD->getIntPtrType(getContext()), + if (TD) { + return getConstant(IntTy, TD->getStructLayout(STy)->getElementOffset(FieldNo)); + } Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) C = Folded; - Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); - return getTruncateOrZeroExtend(getSCEV(C), Ty); -} -const SCEV *ScalarEvolution::getOffsetOfExpr(Type *CTy, - Constant *FieldNo) { - Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) - C = Folded; - Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy)); + Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); } @@ -2700,12 +2687,15 @@ uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); - if (Ty->isIntegerTy()) + if (Ty->isIntegerTy()) { return Ty; + } // The only other support type is pointer. assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!"); - if (TD) return TD->getIntPtrType(getContext()); + + if (TD) + return TD->getIntPtrType(Ty); // Without DataLayout, conservatively assume pointers are 64-bit. return Type::getInt64Ty(getContext()); @@ -2715,13 +2705,51 @@ const SCEV *ScalarEvolution::getCouldNotCompute() { return &CouldNotCompute; } +namespace { + // Helper class working with SCEVTraversal to figure out if a SCEV contains + // a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne + // is set iff if find such SCEVUnknown. + // + struct FindInvalidSCEVUnknown { + bool FindOne; + FindInvalidSCEVUnknown() { FindOne = false; } + bool follow(const SCEV *S) { + switch (S->getSCEVType()) { + case scConstant: + return false; + case scUnknown: + if (!cast<SCEVUnknown>(S)->getValue()) + FindOne = true; + return false; + default: + return true; + } + } + bool isDone() const { return FindOne; } + }; +} + +bool ScalarEvolution::checkValidity(const SCEV *S) const { + FindInvalidSCEVUnknown F; + SCEVTraversal<FindInvalidSCEVUnknown> ST(F); + ST.visitAll(S); + + return !F.FindOne; +} + /// getSCEV - Return an existing SCEV if it exists, otherwise analyze the /// expression and create a new one. const SCEV *ScalarEvolution::getSCEV(Value *V) { assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); - ValueExprMapType::const_iterator I = ValueExprMap.find_as(V); - if (I != ValueExprMap.end()) return I->second; + ValueExprMapType::iterator I = ValueExprMap.find_as(V); + if (I != ValueExprMap.end()) { + const SCEV *S = I->second; + if (checkValidity(S)) + return S; + else + ValueExprMap.erase(I); + } const SCEV *S = createSCEV(V); // The process of creating a SCEV for V may have caused other SCEVs @@ -3060,15 +3088,26 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { Flags = setFlags(Flags, SCEV::FlagNUW); if (OBO->hasNoSignedWrap()) Flags = setFlags(Flags, SCEV::FlagNSW); - } else if (const GEPOperator *GEP = - dyn_cast<GEPOperator>(BEValueV)) { + } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) { // If the increment is an inbounds GEP, then we know the address // space cannot be wrapped around. We cannot make any guarantee // about signed or unsigned overflow because pointers are // unsigned but we may have a negative index from the base - // pointer. - if (GEP->isInBounds()) + // pointer. We can guarantee that no unsigned wrap occurs if the + // indices form a positive value. + if (GEP->isInBounds()) { Flags = setFlags(Flags, SCEV::FlagNW); + + const SCEV *Ptr = getSCEV(GEP->getPointerOperand()); + if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr))) + Flags = setFlags(Flags, SCEV::FlagNUW); + } + } else if (const SubOperator *OBO = + dyn_cast<SubOperator>(BEValueV)) { + if (OBO->hasNoUnsignedWrap()) + Flags = setFlags(Flags, SCEV::FlagNUW); + if (OBO->hasNoSignedWrap()) + Flags = setFlags(Flags, SCEV::FlagNSW); } const SCEV *StartVal = getSCEV(StartValueV); @@ -3136,18 +3175,18 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { /// operations. This allows them to be analyzed by regular SCEV code. /// const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { + Type *IntPtrTy = getEffectiveSCEVType(GEP->getType()); + Value *Base = GEP->getOperand(0); + // Don't attempt to analyze GEPs over unsized objects. + if (!Base->getType()->getPointerElementType()->isSized()) + return getUnknown(GEP); // Don't blindly transfer the inbounds flag from the GEP instruction to the // Add expression, because the Instruction may be guarded by control flow // and the no-overflow bits may not be valid for the expression in any // context. - bool isInBounds = GEP->isInBounds(); + SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap; - Type *IntPtrTy = getEffectiveSCEVType(GEP->getType()); - Value *Base = GEP->getOperand(0); - // Don't attempt to analyze GEPs over unsized objects. - if (!cast<PointerType>(Base->getType())->getElementType()->isSized()) - return getUnknown(GEP); const SCEV *TotalOffset = getConstant(IntPtrTy, 0); gep_type_iterator GTI = gep_type_begin(GEP); for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()), @@ -3158,21 +3197,19 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { if (StructType *STy = dyn_cast<StructType>(*GTI++)) { // For a struct, add the member offset. unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); - const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo); + const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo); // Add the field offset to the running total offset. TotalOffset = getAddExpr(TotalOffset, FieldOffset); } else { // For an array, add the element offset, explicitly scaled. - const SCEV *ElementSize = getSizeOfExpr(*GTI); + const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, *GTI); const SCEV *IndexS = getSCEV(Index); // Getelementptr indices are signed. IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy); // Multiply the index by the element size to compute the element offset. - const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, - isInBounds ? SCEV::FlagNSW : - SCEV::FlagAnyWrap); + const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, Wrap); // Add the element offset to the running total offset. TotalOffset = getAddExpr(TotalOffset, LocalOffset); @@ -3183,8 +3220,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { const SCEV *BaseS = getSCEV(Base); // Add the total offset from all the GEP indices to the base. - return getAddExpr(BaseS, TotalOffset, - isInBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap); + return getAddExpr(BaseS, TotalOffset, Wrap); } /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is @@ -3551,7 +3587,7 @@ ScalarEvolution::getSignedRange(const SCEV *S) { if (!U->getValue()->getType()->isIntegerTy() && !TD) return setSignedRange(U, ConservativeResult); unsigned NS = ComputeNumSignBits(U->getValue(), TD); - if (NS == 1) + if (NS <= 1) return setSignedRange(U, ConservativeResult); return setSignedRange(U, ConservativeResult.intersectWith( ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), @@ -3751,7 +3787,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { break; Constant *X = ConstantInt::get(getContext(), - APInt(BitWidth, 1).shl(SA->getZExtValue())); + APInt::getOneBitSet(BitWidth, SA->getZExtValue())); return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X)); } break; @@ -3769,7 +3805,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { break; Constant *X = ConstantInt::get(getContext(), - APInt(BitWidth, 1).shl(SA->getZExtValue())); + APInt::getOneBitSet(BitWidth, SA->getZExtValue())); return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X)); } break; @@ -3947,7 +3983,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { /// depends on a NSW assumption, and we would only fall back to a conservative /// trip count in that case. unsigned ScalarEvolution:: -getSmallConstantTripCount(Loop *L, BasicBlock */*ExitingBlock*/) { +getSmallConstantTripCount(Loop *L, BasicBlock * /*ExitingBlock*/) { const SCEVConstant *ExitCount = dyn_cast<SCEVConstant>(getBackedgeTakenCount(L)); if (!ExitCount) @@ -3976,7 +4012,7 @@ getSmallConstantTripCount(Loop *L, BasicBlock */*ExitingBlock*/) { /// As explained in the comments for getSmallConstantTripCount, this assumes /// that control exits the loop via ExitingBlock. unsigned ScalarEvolution:: -getSmallConstantTripMultiple(Loop *L, BasicBlock */*ExitingBlock*/) { +getSmallConstantTripMultiple(Loop *L, BasicBlock * /*ExitingBlock*/) { const SCEV *ExitCount = getBackedgeTakenCount(L); if (ExitCount == getCouldNotCompute()) return 1; @@ -4575,25 +4611,17 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L, if (EL.hasAnyInfo()) return EL; break; } - case ICmpInst::ICMP_SLT: { - ExitLimit EL = HowManyLessThans(LHS, RHS, L, true, IsSubExpr); - if (EL.hasAnyInfo()) return EL; - break; - } - case ICmpInst::ICMP_SGT: { - ExitLimit EL = HowManyLessThans(getNotSCEV(LHS), - getNotSCEV(RHS), L, true, IsSubExpr); - if (EL.hasAnyInfo()) return EL; - break; - } - case ICmpInst::ICMP_ULT: { - ExitLimit EL = HowManyLessThans(LHS, RHS, L, false, IsSubExpr); + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_ULT: { // while (X < Y) + bool IsSigned = Cond == ICmpInst::ICMP_SLT; + ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, IsSubExpr); if (EL.hasAnyInfo()) return EL; break; } - case ICmpInst::ICMP_UGT: { - ExitLimit EL = HowManyLessThans(getNotSCEV(LHS), - getNotSCEV(RHS), L, false, IsSubExpr); + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_UGT: { // while (X > Y) + bool IsSigned = Cond == ICmpInst::ICMP_SGT; + ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, IsSubExpr); if (EL.hasAnyInfo()) return EL; break; } @@ -5031,15 +5059,21 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, /// original value V is returned. const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { // Check to see if we've folded this expression at this loop before. - std::map<const Loop *, const SCEV *> &Values = ValuesAtScopes[V]; - std::pair<std::map<const Loop *, const SCEV *>::iterator, bool> Pair = - Values.insert(std::make_pair(L, static_cast<const SCEV *>(0))); - if (!Pair.second) - return Pair.first->second ? Pair.first->second : V; - + SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values = ValuesAtScopes[V]; + for (unsigned u = 0; u < Values.size(); u++) { + if (Values[u].first == L) + return Values[u].second ? Values[u].second : V; + } + Values.push_back(std::make_pair(L, static_cast<const SCEV *>(0))); // Otherwise compute it. const SCEV *C = computeSCEVAtScope(V, L); - ValuesAtScopes[V][L] = C; + SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values2 = ValuesAtScopes[V]; + for (unsigned u = Values2.size(); u > 0; u--) { + if (Values2[u - 1].first == L) { + Values2[u - 1].second = C; + break; + } + } return C; } @@ -5078,18 +5112,23 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { case scAddExpr: { const SCEVAddExpr *SA = cast<SCEVAddExpr>(V); if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) { - if (C->getType()->isPointerTy()) - C = ConstantExpr::getBitCast(C, Type::getInt8PtrTy(C->getContext())); + if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) { + unsigned AS = PTy->getAddressSpace(); + Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS); + C = ConstantExpr::getBitCast(C, DestPtrTy); + } for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) { Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i)); if (!C2) return 0; // First pointer! if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) { + unsigned AS = C2->getType()->getPointerAddressSpace(); std::swap(C, C2); + Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS); // The offsets have been converted to bytes. We can add bytes to an // i8* by GEP with the byte count in the first index. - C = ConstantExpr::getBitCast(C,Type::getInt8PtrTy(C->getContext())); + C = ConstantExpr::getBitCast(C, DestPtrTy); } // Don't bother trying to sum two pointers. We probably can't @@ -5097,8 +5136,8 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { if (C2->getType()->isPointerTy()) return 0; - if (C->getType()->isPointerTy()) { - if (cast<PointerType>(C->getType())->getElementType()->isStructTy()) + if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) { + if (PTy->getElementType()->isStructTy()) C2 = ConstantExpr::getIntegerCast( C2, Type::getInt32Ty(C->getContext()), true); C = ConstantExpr::getGetElementPtr(C, C2); @@ -6295,45 +6334,72 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, return false; } -/// getBECount - Subtract the end and start values and divide by the step, -/// rounding up, to get the number of times the backedge is executed. Return -/// CouldNotCompute if an intermediate computation overflows. -const SCEV *ScalarEvolution::getBECount(const SCEV *Start, - const SCEV *End, - const SCEV *Step, - bool NoWrap) { - assert(!isKnownNegative(Step) && - "This code doesn't handle negative strides yet!"); - - Type *Ty = Start->getType(); - - // When Start == End, we have an exact BECount == 0. Short-circuit this case - // here because SCEV may not be able to determine that the unsigned division - // after rounding is zero. - if (Start == End) - return getConstant(Ty, 0); - - const SCEV *NegOne = getConstant(Ty, (uint64_t)-1); - const SCEV *Diff = getMinusSCEV(End, Start); - const SCEV *RoundUp = getAddExpr(Step, NegOne); - - // Add an adjustment to the difference between End and Start so that - // the division will effectively round up. - const SCEV *Add = getAddExpr(Diff, RoundUp); - - if (!NoWrap) { - // Check Add for unsigned overflow. - // TODO: More sophisticated things could be done here. - Type *WideTy = IntegerType::get(getContext(), - getTypeSizeInBits(Ty) + 1); - const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy); - const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy); - const SCEV *OperandExtendedAdd = getAddExpr(EDiff, ERoundUp); - if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd) - return getCouldNotCompute(); +// Verify if an linear IV with positive stride can overflow when in a +// less-than comparison, knowing the invariant term of the comparison, the +// stride and the knowledge of NSW/NUW flags on the recurrence. +bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, + bool IsSigned, bool NoWrap) { + if (NoWrap) return false; + + unsigned BitWidth = getTypeSizeInBits(RHS->getType()); + const SCEV *One = getConstant(Stride->getType(), 1); + + if (IsSigned) { + APInt MaxRHS = getSignedRange(RHS).getSignedMax(); + APInt MaxValue = APInt::getSignedMaxValue(BitWidth); + APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One)) + .getSignedMax(); + + // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow! + return (MaxValue - MaxStrideMinusOne).slt(MaxRHS); } - return getUDivExpr(Add, Step); + APInt MaxRHS = getUnsignedRange(RHS).getUnsignedMax(); + APInt MaxValue = APInt::getMaxValue(BitWidth); + APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One)) + .getUnsignedMax(); + + // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow! + return (MaxValue - MaxStrideMinusOne).ult(MaxRHS); +} + +// Verify if an linear IV with negative stride can overflow when in a +// greater-than comparison, knowing the invariant term of the comparison, +// the stride and the knowledge of NSW/NUW flags on the recurrence. +bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, + bool IsSigned, bool NoWrap) { + if (NoWrap) return false; + + unsigned BitWidth = getTypeSizeInBits(RHS->getType()); + const SCEV *One = getConstant(Stride->getType(), 1); + + if (IsSigned) { + APInt MinRHS = getSignedRange(RHS).getSignedMin(); + APInt MinValue = APInt::getSignedMinValue(BitWidth); + APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One)) + .getSignedMax(); + + // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow! + return (MinValue + MaxStrideMinusOne).sgt(MinRHS); + } + + APInt MinRHS = getUnsignedRange(RHS).getUnsignedMin(); + APInt MinValue = APInt::getMinValue(BitWidth); + APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One)) + .getUnsignedMax(); + + // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow! + return (MinValue + MaxStrideMinusOne).ugt(MinRHS); +} + +// Compute the backedge taken count knowing the interval difference, the +// stride and presence of the equality in the comparison. +const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step, + bool Equality) { + const SCEV *One = getConstant(Step->getType(), 1); + Delta = Equality ? getAddExpr(Delta, Step) + : getAddExpr(Delta, getMinusSCEV(Step, One)); + return getUDivExpr(Delta, Step); } /// HowManyLessThans - Return the number of times a backedge containing the @@ -6345,119 +6411,144 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start, /// a subexpression that cannot overflow before evaluating true. ScalarEvolution::ExitLimit ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, - const Loop *L, bool isSigned, + const Loop *L, bool IsSigned, bool IsSubExpr) { - // Only handle: "ADDREC < LoopInvariant". - if (!isLoopInvariant(RHS, L)) return getCouldNotCompute(); + // We handle only IV < Invariant + if (!isLoopInvariant(RHS, L)) + return getCouldNotCompute(); - const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS); - if (!AddRec || AddRec->getLoop() != L) + const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS); + + // Avoid weird loops + if (!IV || IV->getLoop() != L || !IV->isAffine()) return getCouldNotCompute(); - // Check to see if we have a flag which makes analysis easy. - bool NoWrap = false; - if (!IsSubExpr) { - NoWrap = AddRec->getNoWrapFlags( - (SCEV::NoWrapFlags)(((isSigned ? SCEV::FlagNSW : SCEV::FlagNUW)) - | SCEV::FlagNW)); - } - if (AddRec->isAffine()) { - unsigned BitWidth = getTypeSizeInBits(AddRec->getType()); - const SCEV *Step = AddRec->getStepRecurrence(*this); + bool NoWrap = !IsSubExpr && + IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW); - if (Step->isZero()) - return getCouldNotCompute(); - if (Step->isOne()) { - // With unit stride, the iteration never steps past the limit value. - } else if (isKnownPositive(Step)) { - // Test whether a positive iteration can step past the limit - // value and past the maximum value for its type in a single step. - // Note that it's not sufficient to check NoWrap here, because even - // though the value after a wrap is undefined, it's not undefined - // behavior, so if wrap does occur, the loop could either terminate or - // loop infinitely, but in either case, the loop is guaranteed to - // iterate at least until the iteration where the wrapping occurs. - const SCEV *One = getConstant(Step->getType(), 1); - if (isSigned) { - APInt Max = APInt::getSignedMaxValue(BitWidth); - if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax()) - .slt(getSignedRange(RHS).getSignedMax())) - return getCouldNotCompute(); - } else { - APInt Max = APInt::getMaxValue(BitWidth); - if ((Max - getUnsignedRange(getMinusSCEV(Step, One)).getUnsignedMax()) - .ult(getUnsignedRange(RHS).getUnsignedMax())) - return getCouldNotCompute(); - } - } else - // TODO: Handle negative strides here and below. - return getCouldNotCompute(); + const SCEV *Stride = IV->getStepRecurrence(*this); - // We know the LHS is of the form {n,+,s} and the RHS is some loop-invariant - // m. So, we count the number of iterations in which {n,+,s} < m is true. - // Note that we cannot simply return max(m-n,0)/s because it's not safe to - // treat m-n as signed nor unsigned due to overflow possibility. - - // First, we get the value of the LHS in the first iteration: n - const SCEV *Start = AddRec->getOperand(0); - - // Determine the minimum constant start value. - const SCEV *MinStart = getConstant(isSigned ? - getSignedRange(Start).getSignedMin() : - getUnsignedRange(Start).getUnsignedMin()); - - // If we know that the condition is true in order to enter the loop, - // then we know that it will run exactly (m-n)/s times. Otherwise, we - // only know that it will execute (max(m,n)-n)/s times. In both cases, - // the division must round up. - const SCEV *End = RHS; - if (!isLoopEntryGuardedByCond(L, - isSigned ? ICmpInst::ICMP_SLT : - ICmpInst::ICMP_ULT, - getMinusSCEV(Start, Step), RHS)) - End = isSigned ? getSMaxExpr(RHS, Start) - : getUMaxExpr(RHS, Start); - - // Determine the maximum constant end value. - const SCEV *MaxEnd = getConstant(isSigned ? - getSignedRange(End).getSignedMax() : - getUnsignedRange(End).getUnsignedMax()); - - // If MaxEnd is within a step of the maximum integer value in its type, - // adjust it down to the minimum value which would produce the same effect. - // This allows the subsequent ceiling division of (N+(step-1))/step to - // compute the correct value. - const SCEV *StepMinusOne = getMinusSCEV(Step, - getConstant(Step->getType(), 1)); - MaxEnd = isSigned ? - getSMinExpr(MaxEnd, - getMinusSCEV(getConstant(APInt::getSignedMaxValue(BitWidth)), - StepMinusOne)) : - getUMinExpr(MaxEnd, - getMinusSCEV(getConstant(APInt::getMaxValue(BitWidth)), - StepMinusOne)); - - // Finally, we subtract these two values and divide, rounding up, to get - // the number of times the backedge is executed. - const SCEV *BECount = getBECount(Start, End, Step, NoWrap); - - // The maximum backedge count is similar, except using the minimum start - // value and the maximum end value. - // If we already have an exact constant BECount, use it instead. - const SCEV *MaxBECount = isa<SCEVConstant>(BECount) ? BECount - : getBECount(MinStart, MaxEnd, Step, NoWrap); - - // If the stride is nonconstant, and NoWrap == true, then - // getBECount(MinStart, MaxEnd) may not compute. This would result in an - // exact BECount and invalid MaxBECount, which should be avoided to catch - // more optimization opportunities. - if (isa<SCEVCouldNotCompute>(MaxBECount)) - MaxBECount = BECount; - - return ExitLimit(BECount, MaxBECount); - } + // Avoid negative or zero stride values + if (!isKnownPositive(Stride)) + return getCouldNotCompute(); - return getCouldNotCompute(); + // Avoid proven overflow cases: this will ensure that the backedge taken count + // will not generate any unsigned overflow. Relaxed no-overflow conditions + // exploit NoWrapFlags, allowing to optimize in presence of undefined + // behaviors like the case of C language. + if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap)) + return getCouldNotCompute(); + + ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT + : ICmpInst::ICMP_ULT; + const SCEV *Start = IV->getStart(); + const SCEV *End = RHS; + if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS)) + End = IsSigned ? getSMaxExpr(RHS, Start) + : getUMaxExpr(RHS, Start); + + const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride, false); + + APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin() + : getUnsignedRange(Start).getUnsignedMin(); + + APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin() + : getUnsignedRange(Stride).getUnsignedMin(); + + unsigned BitWidth = getTypeSizeInBits(LHS->getType()); + APInt Limit = IsSigned ? APInt::getSignedMaxValue(BitWidth) - (MinStride - 1) + : APInt::getMaxValue(BitWidth) - (MinStride - 1); + + // Although End can be a MAX expression we estimate MaxEnd considering only + // the case End = RHS. This is safe because in the other case (End - Start) + // is zero, leading to a zero maximum backedge taken count. + APInt MaxEnd = + IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit) + : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit); + + const SCEV *MaxBECount = getCouldNotCompute(); + if (isa<SCEVConstant>(BECount)) + MaxBECount = BECount; + else + MaxBECount = computeBECount(getConstant(MaxEnd - MinStart), + getConstant(MinStride), false); + + if (isa<SCEVCouldNotCompute>(MaxBECount)) + MaxBECount = BECount; + + return ExitLimit(BECount, MaxBECount); +} + +ScalarEvolution::ExitLimit +ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, + const Loop *L, bool IsSigned, + bool IsSubExpr) { + // We handle only IV > Invariant + if (!isLoopInvariant(RHS, L)) + return getCouldNotCompute(); + + const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS); + + // Avoid weird loops + if (!IV || IV->getLoop() != L || !IV->isAffine()) + return getCouldNotCompute(); + + bool NoWrap = !IsSubExpr && + IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW); + + const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this)); + + // Avoid negative or zero stride values + if (!isKnownPositive(Stride)) + return getCouldNotCompute(); + + // Avoid proven overflow cases: this will ensure that the backedge taken count + // will not generate any unsigned overflow. Relaxed no-overflow conditions + // exploit NoWrapFlags, allowing to optimize in presence of undefined + // behaviors like the case of C language. + if (!Stride->isOne() && doesIVOverflowOnGT(RHS, Stride, IsSigned, NoWrap)) + return getCouldNotCompute(); + + ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SGT + : ICmpInst::ICMP_UGT; + + const SCEV *Start = IV->getStart(); + const SCEV *End = RHS; + if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS)) + End = IsSigned ? getSMinExpr(RHS, Start) + : getUMinExpr(RHS, Start); + + const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false); + + APInt MaxStart = IsSigned ? getSignedRange(Start).getSignedMax() + : getUnsignedRange(Start).getUnsignedMax(); + + APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin() + : getUnsignedRange(Stride).getUnsignedMin(); + + unsigned BitWidth = getTypeSizeInBits(LHS->getType()); + APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1) + : APInt::getMinValue(BitWidth) + (MinStride - 1); + + // Although End can be a MIN expression we estimate MinEnd considering only + // the case End = RHS. This is safe because in the other case (Start - End) + // is zero, leading to a zero maximum backedge taken count. + APInt MinEnd = + IsSigned ? APIntOps::smax(getSignedRange(RHS).getSignedMin(), Limit) + : APIntOps::umax(getUnsignedRange(RHS).getUnsignedMin(), Limit); + + + const SCEV *MaxBECount = getCouldNotCompute(); + if (isa<SCEVConstant>(BECount)) + MaxBECount = BECount; + else + MaxBECount = computeBECount(getConstant(MaxStart - MinEnd), + getConstant(MinStride), false); + + if (isa<SCEVCouldNotCompute>(MaxBECount)) + MaxBECount = BECount; + + return ExitLimit(BECount, MaxBECount); } /// getNumIterationsInRange - Return the number of iterations of this loop that @@ -6586,7 +6677,534 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, return SE.getCouldNotCompute(); } +static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) { + APInt A = C1->getValue()->getValue().abs(); + APInt B = C2->getValue()->getValue().abs(); + uint32_t ABW = A.getBitWidth(); + uint32_t BBW = B.getBitWidth(); + if (ABW > BBW) + B = B.zext(ABW); + else if (ABW < BBW) + A = A.zext(BBW); + + return APIntOps::GreatestCommonDivisor(A, B); +} + +static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) { + APInt A = C1->getValue()->getValue(); + APInt B = C2->getValue()->getValue(); + uint32_t ABW = A.getBitWidth(); + uint32_t BBW = B.getBitWidth(); + + if (ABW > BBW) + B = B.sext(ABW); + else if (ABW < BBW) + A = A.sext(BBW); + + return APIntOps::srem(A, B); +} + +static const APInt sdiv(const SCEVConstant *C1, const SCEVConstant *C2) { + APInt A = C1->getValue()->getValue(); + APInt B = C2->getValue()->getValue(); + uint32_t ABW = A.getBitWidth(); + uint32_t BBW = B.getBitWidth(); + + if (ABW > BBW) + B = B.sext(ABW); + else if (ABW < BBW) + A = A.sext(BBW); + + return APIntOps::sdiv(A, B); +} + +namespace { +struct SCEVGCD : public SCEVVisitor<SCEVGCD, const SCEV *> { +public: + // Pattern match Step into Start. When Step is a multiply expression, find + // the largest subexpression of Step that appears in Start. When Start is an + // add expression, try to match Step in the subexpressions of Start, non + // matching subexpressions are returned under Remainder. + static const SCEV *findGCD(ScalarEvolution &SE, const SCEV *Start, + const SCEV *Step, const SCEV **Remainder) { + assert(Remainder && "Remainder should not be NULL"); + SCEVGCD R(SE, Step, SE.getConstant(Step->getType(), 0)); + const SCEV *Res = R.visit(Start); + *Remainder = R.Remainder; + return Res; + } + + SCEVGCD(ScalarEvolution &S, const SCEV *G, const SCEV *R) + : SE(S), GCD(G), Remainder(R) { + Zero = SE.getConstant(GCD->getType(), 0); + One = SE.getConstant(GCD->getType(), 1); + } + + const SCEV *visitConstant(const SCEVConstant *Constant) { + if (GCD == Constant || Constant == Zero) + return GCD; + + if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD)) { + const SCEV *Res = SE.getConstant(gcd(Constant, CGCD)); + if (Res != One) + return Res; + + Remainder = SE.getConstant(srem(Constant, CGCD)); + Constant = cast<SCEVConstant>(SE.getMinusSCEV(Constant, Remainder)); + Res = SE.getConstant(gcd(Constant, CGCD)); + return Res; + } + + // When GCD is not a constant, it could be that the GCD is an Add, Mul, + // AddRec, etc., in which case we want to find out how many times the + // Constant divides the GCD: we then return that as the new GCD. + const SCEV *Rem = Zero; + const SCEV *Res = findGCD(SE, GCD, Constant, &Rem); + + if (Res == One || Rem != Zero) { + Remainder = Constant; + return One; + } + + assert(isa<SCEVConstant>(Res) && "Res should be a constant"); + Remainder = SE.getConstant(srem(Constant, cast<SCEVConstant>(Res))); + return Res; + } + + const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { + if (GCD == Expr) + return GCD; + + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { + const SCEV *Rem = Zero; + const SCEV *Res = findGCD(SE, Expr->getOperand(e - 1 - i), GCD, &Rem); + + // FIXME: There may be ambiguous situations: for instance, + // GCD(-4 + (3 * %m), 2 * %m) where 2 divides -4 and %m divides (3 * %m). + // The order in which the AddExpr is traversed computes a different GCD + // and Remainder. + if (Res != One) + GCD = Res; + if (Rem != Zero) + Remainder = SE.getAddExpr(Remainder, Rem); + } + + return GCD; + } + + const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { + if (GCD == Expr) + return GCD; + + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { + if (Expr->getOperand(i) == GCD) + return GCD; + } + + // If we have not returned yet, it means that GCD is not part of Expr. + const SCEV *PartialGCD = One; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { + const SCEV *Rem = Zero; + const SCEV *Res = findGCD(SE, Expr->getOperand(i), GCD, &Rem); + if (Rem != Zero) + // GCD does not divide Expr->getOperand(i). + continue; + + if (Res == GCD) + return GCD; + PartialGCD = SE.getMulExpr(PartialGCD, Res); + if (PartialGCD == GCD) + return GCD; + } + + if (PartialGCD != One) + return PartialGCD; + + Remainder = Expr; + const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(GCD); + if (!Mul) + return PartialGCD; + + // When the GCD is a multiply expression, try to decompose it: + // this occurs when Step does not divide the Start expression + // as in: {(-4 + (3 * %m)),+,(2 * %m)} + for (int i = 0, e = Mul->getNumOperands(); i < e; ++i) { + const SCEV *Rem = Zero; + const SCEV *Res = findGCD(SE, Expr, Mul->getOperand(i), &Rem); + if (Rem == Zero) { + Remainder = Rem; + return Res; + } + } + + return PartialGCD; + } + + const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { + if (GCD == Expr) + return GCD; + + if (!Expr->isAffine()) { + Remainder = Expr; + return GCD; + } + + const SCEV *Rem = Zero; + const SCEV *Res = findGCD(SE, Expr->getOperand(0), GCD, &Rem); + if (Rem != Zero) + Remainder = SE.getAddExpr(Remainder, Rem); + + Rem = Zero; + Res = findGCD(SE, Expr->getOperand(1), Res, &Rem); + if (Rem != Zero) { + Remainder = Expr; + return GCD; + } + + return Res; + } + + const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { + return One; + } + +private: + ScalarEvolution &SE; + const SCEV *GCD, *Remainder, *Zero, *One; +}; + +struct SCEVDivision : public SCEVVisitor<SCEVDivision, const SCEV *> { +public: + // Remove from Start all multiples of Step. + static const SCEV *divide(ScalarEvolution &SE, const SCEV *Start, + const SCEV *Step) { + SCEVDivision D(SE, Step); + const SCEV *Rem = D.Zero; + (void)Rem; + // The division is guaranteed to succeed: Step should divide Start with no + // remainder. + assert(Step == SCEVGCD::findGCD(SE, Start, Step, &Rem) && Rem == D.Zero && + "Step should divide Start with no remainder."); + return D.visit(Start); + } + + SCEVDivision(ScalarEvolution &S, const SCEV *G) : SE(S), GCD(G) { + Zero = SE.getConstant(GCD->getType(), 0); + One = SE.getConstant(GCD->getType(), 1); + } + + const SCEV *visitConstant(const SCEVConstant *Constant) { + if (GCD == Constant) + return One; + + if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD)) + return SE.getConstant(sdiv(Constant, CGCD)); + return Constant; + } + + const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { + if (GCD == Expr) + return One; + + SmallVector<const SCEV *, 2> Operands; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + Operands.push_back(divide(SE, Expr->getOperand(i), GCD)); + + if (Operands.size() == 1) + return Operands[0]; + return SE.getAddExpr(Operands); + } + + const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { + if (GCD == Expr) + return One; + + bool FoundGCDTerm = false; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + if (Expr->getOperand(i) == GCD) + FoundGCDTerm = true; + + SmallVector<const SCEV *, 2> Operands; + if (FoundGCDTerm) { + FoundGCDTerm = false; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { + if (FoundGCDTerm) + Operands.push_back(Expr->getOperand(i)); + else if (Expr->getOperand(i) == GCD) + FoundGCDTerm = true; + else + Operands.push_back(Expr->getOperand(i)); + } + } else { + FoundGCDTerm = false; + const SCEV *PartialGCD = One; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { + if (PartialGCD == GCD) { + Operands.push_back(Expr->getOperand(i)); + continue; + } + + const SCEV *Rem = Zero; + const SCEV *Res = SCEVGCD::findGCD(SE, Expr->getOperand(i), GCD, &Rem); + if (Rem == Zero) { + PartialGCD = SE.getMulExpr(PartialGCD, Res); + Operands.push_back(divide(SE, Expr->getOperand(i), GCD)); + } else { + Operands.push_back(Expr->getOperand(i)); + } + } + } + + if (Operands.size() == 1) + return Operands[0]; + return SE.getMulExpr(Operands); + } + + const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { + if (GCD == Expr) + return One; + + assert(Expr->isAffine() && "Expr should be affine"); + + const SCEV *Start = divide(SE, Expr->getStart(), GCD); + const SCEV *Step = divide(SE, Expr->getStepRecurrence(SE), GCD); + + return SE.getAddRecExpr(Start, Step, Expr->getLoop(), + Expr->getNoWrapFlags()); + } + + const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { + return Expr; + } + +private: + ScalarEvolution &SE; + const SCEV *GCD, *Zero, *One; +}; +} + +/// Splits the SCEV into two vectors of SCEVs representing the subscripts and +/// sizes of an array access. Returns the remainder of the delinearization that +/// is the offset start of the array. The SCEV->delinearize algorithm computes +/// the multiples of SCEV coefficients: that is a pattern matching of sub +/// expressions in the stride and base of a SCEV corresponding to the +/// computation of a GCD (greatest common divisor) of base and stride. When +/// SCEV->delinearize fails, it returns the SCEV unchanged. +/// +/// For example: when analyzing the memory access A[i][j][k] in this loop nest +/// +/// void foo(long n, long m, long o, double A[n][m][o]) { +/// +/// for (long i = 0; i < n; i++) +/// for (long j = 0; j < m; j++) +/// for (long k = 0; k < o; k++) +/// A[i][j][k] = 1.0; +/// } +/// +/// the delinearization input is the following AddRec SCEV: +/// +/// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k> +/// +/// From this SCEV, we are able to say that the base offset of the access is %A +/// because it appears as an offset that does not divide any of the strides in +/// the loops: +/// +/// CHECK: Base offset: %A +/// +/// and then SCEV->delinearize determines the size of some of the dimensions of +/// the array as these are the multiples by which the strides are happening: +/// +/// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes. +/// +/// Note that the outermost dimension remains of UnknownSize because there are +/// no strides that would help identifying the size of the last dimension: when +/// the array has been statically allocated, one could compute the size of that +/// dimension by dividing the overall size of the array by the size of the known +/// dimensions: %m * %o * 8. +/// +/// Finally delinearize provides the access functions for the array reference +/// that does correspond to A[i][j][k] of the above C testcase: +/// +/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>] +/// +/// The testcases are checking the output of a function pass: +/// DelinearizationPass that walks through all loads and stores of a function +/// asking for the SCEV of the memory access with respect to all enclosing +/// loops, calling SCEV->delinearize on that and printing the results. + +const SCEV * +SCEVAddRecExpr::delinearize(ScalarEvolution &SE, + SmallVectorImpl<const SCEV *> &Subscripts, + SmallVectorImpl<const SCEV *> &Sizes) const { + // Early exit in case this SCEV is not an affine multivariate function. + if (!this->isAffine()) + return this; + + const SCEV *Start = this->getStart(); + const SCEV *Step = this->getStepRecurrence(SE); + + // Build the SCEV representation of the cannonical induction variable in the + // loop of this SCEV. + const SCEV *Zero = SE.getConstant(this->getType(), 0); + const SCEV *One = SE.getConstant(this->getType(), 1); + const SCEV *IV = + SE.getAddRecExpr(Zero, One, this->getLoop(), this->getNoWrapFlags()); + + DEBUG(dbgs() << "(delinearize: " << *this << "\n"); + + // Currently we fail to delinearize when the stride of this SCEV is 1. We + // could decide to not fail in this case: we could just return 1 for the size + // of the subscript, and this same SCEV for the access function. + if (Step == One) { + DEBUG(dbgs() << "failed to delinearize " << *this << "\n)\n"); + return this; + } + + // Find the GCD and Remainder of the Start and Step coefficients of this SCEV. + const SCEV *Remainder = NULL; + const SCEV *GCD = SCEVGCD::findGCD(SE, Start, Step, &Remainder); + + DEBUG(dbgs() << "GCD: " << *GCD << "\n"); + DEBUG(dbgs() << "Remainder: " << *Remainder << "\n"); + + // Same remark as above: we currently fail the delinearization, although we + // can very well handle this special case. + if (GCD == One) { + DEBUG(dbgs() << "failed to delinearize " << *this << "\n)\n"); + return this; + } + + // As findGCD computed Remainder, GCD divides "Start - Remainder." The + // Quotient is then this SCEV without Remainder, scaled down by the GCD. The + // Quotient is what will be used in the next subscript delinearization. + const SCEV *Quotient = + SCEVDivision::divide(SE, SE.getMinusSCEV(Start, Remainder), GCD); + DEBUG(dbgs() << "Quotient: " << *Quotient << "\n"); + + const SCEV *Rem; + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Quotient)) + // Recursively call delinearize on the Quotient until there are no more + // multiples that can be recognized. + Rem = AR->delinearize(SE, Subscripts, Sizes); + else + Rem = Quotient; + + // Scale up the cannonical induction variable IV by whatever remains from the + // Step after division by the GCD: the GCD is the size of all the sub-array. + if (Step != GCD) { + Step = SCEVDivision::divide(SE, Step, GCD); + IV = SE.getMulExpr(IV, Step); + } + // The access function in the current subscript is computed as the cannonical + // induction variable IV (potentially scaled up by the step) and offset by + // Rem, the offset of delinearization in the sub-array. + const SCEV *Index = SE.getAddExpr(IV, Rem); + + // Record the access function and the size of the current subscript. + Subscripts.push_back(Index); + Sizes.push_back(GCD); + +#ifndef NDEBUG + int Size = Sizes.size(); + DEBUG(dbgs() << "succeeded to delinearize " << *this << "\n"); + DEBUG(dbgs() << "ArrayDecl[UnknownSize]"); + for (int i = 0; i < Size - 1; i++) + DEBUG(dbgs() << "[" << *Sizes[i] << "]"); + DEBUG(dbgs() << " with elements of " << *Sizes[Size - 1] << " bytes.\n"); + + DEBUG(dbgs() << "ArrayRef"); + for (int i = 0; i < Size; i++) + DEBUG(dbgs() << "[" << *Subscripts[i] << "]"); + DEBUG(dbgs() << "\n)\n"); +#endif + + return Remainder; +} //===----------------------------------------------------------------------===// // SCEVCallbackVH Class Implementation @@ -6642,7 +7260,7 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) //===----------------------------------------------------------------------===// ScalarEvolution::ScalarEvolution() - : FunctionPass(ID), FirstUnknown(0) { + : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64), FirstUnknown(0) { initializeScalarEvolutionPass(*PassRegistry::getPassRegistry()); } @@ -6780,14 +7398,21 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { ScalarEvolution::LoopDisposition ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) { - std::map<const Loop *, LoopDisposition> &Values = LoopDispositions[S]; - std::pair<std::map<const Loop *, LoopDisposition>::iterator, bool> Pair = - Values.insert(std::make_pair(L, LoopVariant)); - if (!Pair.second) - return Pair.first->second; - + SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values = LoopDispositions[S]; + for (unsigned u = 0; u < Values.size(); u++) { + if (Values[u].first == L) + return Values[u].second; + } + Values.push_back(std::make_pair(L, LoopVariant)); LoopDisposition D = computeLoopDisposition(S, L); - return LoopDispositions[S][L] = D; + SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values2 = LoopDispositions[S]; + for (unsigned u = Values2.size(); u > 0; u--) { + if (Values2[u - 1].first == L) { + Values2[u - 1].second = D; + break; + } + } + return D; } ScalarEvolution::LoopDisposition @@ -6879,14 +7504,21 @@ bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) { ScalarEvolution::BlockDisposition ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) { - std::map<const BasicBlock *, BlockDisposition> &Values = BlockDispositions[S]; - std::pair<std::map<const BasicBlock *, BlockDisposition>::iterator, bool> - Pair = Values.insert(std::make_pair(BB, DoesNotDominateBlock)); - if (!Pair.second) - return Pair.first->second; - + SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values = BlockDispositions[S]; + for (unsigned u = 0; u < Values.size(); u++) { + if (Values[u].first == BB) + return Values[u].second; + } + Values.push_back(std::make_pair(BB, DoesNotDominateBlock)); BlockDisposition D = computeBlockDisposition(S, BB); - return BlockDispositions[S][BB] = D; + SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values2 = BlockDispositions[S]; + for (unsigned u = Values2.size(); u > 0; u--) { + if (Values2[u - 1].first == BB) { + Values2[u - 1].second = D; + break; + } + } + return D; } ScalarEvolution::BlockDisposition diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp index fcd7ce2..86a557b 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -176,8 +177,8 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, } // Save the original insertion point so we can restore it when we're done. - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc(); + BuilderType::InsertPointGuard Guard(Builder); // Move the insertion point out of as many loops as we can. while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { @@ -191,13 +192,9 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, // If we haven't found this binop, insert it. Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS)); - BO->setDebugLoc(SaveInsertPt->getDebugLoc()); + BO->setDebugLoc(Loc); rememberInstruction(BO); - // Restore the original insert point. - if (SaveInsertBB) - restoreInsertPoint(SaveInsertBB, SaveInsertPt); - return BO; } @@ -294,8 +291,8 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *Start = A->getStart(); if (!FactorOutConstant(Start, Remainder, Factor, SE, TD)) return false; - // FIXME: can use A->getNoWrapFlags(FlagNW) - S = SE.getAddRecExpr(Start, Step, A->getLoop(), SCEV::FlagAnyWrap); + S = SE.getAddRecExpr(Start, Step, A->getLoop(), + A->getNoWrapFlags(SCEV::FlagNW)); return true; } @@ -348,8 +345,7 @@ static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops, AddRecs.push_back(SE.getAddRecExpr(Zero, A->getStepRecurrence(SE), A->getLoop(), - // FIXME: A->getNoWrapFlags(FlagNW) - SCEV::FlagAnyWrap)); + A->getNoWrapFlags(SCEV::FlagNW))); if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) { Ops[i] = Zero; Ops.append(Add->op_begin(), Add->op_end()); @@ -407,6 +403,10 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // without the other. SplitAddRecs(Ops, Ty, SE); + Type *IntPtrTy = SE.TD + ? SE.TD->getIntPtrType(PTy) + : Type::getInt64Ty(PTy->getContext()); + // Descend down the pointer's type and attempt to convert the other // operands into GEP indices, at each level. The first index in a GEP // indexes into the array implied by the pointer operand; the rest of @@ -417,7 +417,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // array indexing. SmallVector<const SCEV *, 8> ScaledOps; if (ElTy->isSized()) { - const SCEV *ElSize = SE.getSizeOfExpr(ElTy); + const SCEV *ElSize = SE.getSizeOfExpr(IntPtrTy, ElTy); if (!ElSize->isZero()) { SmallVector<const SCEV *, 8> NewOps; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { @@ -549,8 +549,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, } // Save the original insertion point so we can restore it when we're done. - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + BuilderType::InsertPointGuard Guard(Builder); // Move the insertion point out of as many loops as we can. while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { @@ -566,16 +565,11 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, Value *GEP = Builder.CreateGEP(V, Idx, "uglygep"); rememberInstruction(GEP); - // Restore the original insert point. - if (SaveInsertBB) - restoreInsertPoint(SaveInsertBB, SaveInsertPt); - return GEP; } // Save the original insertion point so we can restore it when we're done. - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + BuilderType::InsertPoint SaveInsertPt = Builder.saveIP(); // Move the insertion point out of as many loops as we can. while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { @@ -611,8 +605,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, rememberInstruction(GEP); // Restore the original insert point. - if (SaveInsertBB) - restoreInsertPoint(SaveInsertBB, SaveInsertPt); + Builder.restoreIP(SaveInsertPt); return expand(SE.getAddExpr(Ops)); } @@ -846,8 +839,7 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, SE.getAddRecExpr(SE.getConstant(A->getType(), 0), A->getStepRecurrence(SE), A->getLoop(), - // FIXME: A->getNoWrapFlags(FlagNW) - SCEV::FlagAnyWrap)); + A->getNoWrapFlags(SCEV::FlagNW))); } if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) { Base = A->getOperand(A->getNumOperands()-1); @@ -1078,8 +1070,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, } // Save the original insertion point so we can restore it when we're done. - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + BuilderType::InsertPointGuard Guard(Builder); // Another AddRec may need to be recursively expanded below. For example, if // this AddRec is quadratic, the StepV may itself be an AddRec in this @@ -1137,14 +1128,15 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, IVIncInsertPos : Pred->getTerminator(); Builder.SetInsertPoint(InsertPos); Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); - + if (isa<OverflowingBinaryOperator>(IncV)) { + if (Normalized->getNoWrapFlags(SCEV::FlagNUW)) + cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap(); + if (Normalized->getNoWrapFlags(SCEV::FlagNSW)) + cast<BinaryOperator>(IncV)->setHasNoSignedWrap(); + } PN->addIncoming(IncV, Pred); } - // Restore the original insert point. - if (SaveInsertBB) - restoreInsertPoint(SaveInsertBB, SaveInsertPt); - // After expanding subexpressions, restore the PostIncLoops set so the caller // can ensure that IVIncrement dominates the current uses. PostIncLoops = SavedPostIncLoops; @@ -1180,8 +1172,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { Normalized = cast<SCEVAddRecExpr>( SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE), Normalized->getLoop(), - // FIXME: Normalized->getNoWrapFlags(FlagNW) - SCEV::FlagAnyWrap)); + Normalized->getNoWrapFlags(SCEV::FlagNW))); } // Strip off any non-loop-dominating component from the addrec step. @@ -1191,11 +1182,9 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { PostLoopScale = Step; Step = SE.getConstant(Normalized->getType(), 1); Normalized = - cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start, Step, - Normalized->getLoop(), - // FIXME: Normalized - // ->getNoWrapFlags(FlagNW) - SCEV::FlagAnyWrap)); + cast<SCEVAddRecExpr>(SE.getAddRecExpr( + Start, Step, Normalized->getLoop(), + Normalized->getNoWrapFlags(SCEV::FlagNW))); } // Expand the core addrec. If we need post-loop scaling, force it to @@ -1232,19 +1221,19 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); if (useSubtract) Step = SE.getNegativeSCEV(Step); - // Expand the step somewhere that dominates the loop header. - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); - Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); - // Restore the insertion point to the place where the caller has - // determined dominates all uses. - restoreInsertPoint(SaveInsertBB, SaveInsertPt); + Value *StepV; + { + // Expand the step somewhere that dominates the loop header. + BuilderType::InsertPointGuard Guard(Builder); + StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); + } Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); } } // Re-apply any non-loop-dominating scale. if (PostLoopScale) { + assert(S->isAffine() && "Can't linearly scale non-affine recurrences."); Result = InsertNoopCastOfTo(Result, IntTy); Result = Builder.CreateMul(Result, expandCodeFor(PostLoopScale, IntTy)); @@ -1288,18 +1277,15 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType()); Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(), - // FIXME: S->getNoWrapFlags(FlagNW) - SCEV::FlagAnyWrap)); - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + S->getNoWrapFlags(SCEV::FlagNW))); BasicBlock::iterator NewInsertPt = llvm::next(BasicBlock::iterator(cast<Instruction>(V))); + BuilderType::InsertPointGuard Guard(Builder); while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt) || isa<LandingPadInst>(NewInsertPt)) ++NewInsertPt; V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0, NewInsertPt); - restoreInsertPoint(SaveInsertBB, SaveInsertPt); return V; } @@ -1307,8 +1293,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { if (!S->getStart()->isZero()) { SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end()); NewOps[0] = SE.getConstant(Ty, 0); - // FIXME: can use S->getNoWrapFlags() - const SCEV *Rest = SE.getAddRecExpr(NewOps, L, SCEV::FlagAnyWrap); + const SCEV *Rest = SE.getAddRecExpr(NewOps, L, + S->getNoWrapFlags(SCEV::FlagNW)); // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the // comments on expandAddToGEP for details. @@ -1343,9 +1329,13 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { Header->begin()); rememberInstruction(CanonicalIV); + SmallSet<BasicBlock *, 4> PredSeen; Constant *One = ConstantInt::get(Ty, 1); for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { BasicBlock *HP = *HPI; + if (!PredSeen.insert(HP)) + continue; + if (L->contains(HP)) { // Insert a unit add instruction right before the terminator // corresponding to the back-edge. @@ -1528,8 +1518,7 @@ Value *SCEVExpander::expand(const SCEV *S) { if (I != InsertedExpressions.end()) return I->second; - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + BuilderType::InsertPointGuard Guard(Builder); Builder.SetInsertPoint(InsertPt->getParent(), InsertPt); // Expand the expression into instructions. @@ -1542,8 +1531,6 @@ Value *SCEVExpander::expand(const SCEV *S) { // a postinc expansion, it could be reused by a non postinc user, but only if // its insertion point was already at the head of the loop. InsertedExpressions[std::make_pair(S, InsertPt)] = V; - - restoreInsertPoint(SaveInsertBB, SaveInsertPt); return V; } @@ -1554,10 +1541,6 @@ void SCEVExpander::rememberInstruction(Value *I) { InsertedValues.insert(I); } -void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) { - Builder.SetInsertPoint(BB, I); -} - /// getOrInsertCanonicalInductionVariable - This method returns the /// canonical induction variable of the specified type for the specified /// loop (inserting one if there is none). A canonical induction variable @@ -1573,11 +1556,8 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap); // Emit code for it. - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + BuilderType::InsertPointGuard Guard(Builder); PHINode *V = cast<PHINode>(expandCodeFor(H, 0, L->getHeader()->begin())); - if (SaveInsertBB) - restoreInsertPoint(SaveInsertBB, SaveInsertPt); return V; } @@ -1725,28 +1705,43 @@ namespace { // Currently, we only allow division by a nonzero constant here. If this is // inadequate, we could easily allow division by SCEVUnknown by using // ValueTracking to check isKnownNonZero(). +// +// We cannot generally expand recurrences unless the step dominates the loop +// header. The expander handles the special case of affine recurrences by +// scaling the recurrence outside the loop, but this technique isn't generally +// applicable. Expanding a nested recurrence outside a loop requires computing +// binomial coefficients. This could be done, but the recurrence has to be in a +// perfectly reduced form, which can't be guaranteed. struct SCEVFindUnsafe { + ScalarEvolution &SE; bool IsUnsafe; - SCEVFindUnsafe(): IsUnsafe(false) {} + SCEVFindUnsafe(ScalarEvolution &se): SE(se), IsUnsafe(false) {} bool follow(const SCEV *S) { - const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S); - if (!D) - return true; - const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS()); - if (SC && !SC->getValue()->isZero()) - return true; - IsUnsafe = true; - return false; + if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { + const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS()); + if (!SC || SC->getValue()->isZero()) { + IsUnsafe = true; + return false; + } + } + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + const SCEV *Step = AR->getStepRecurrence(SE); + if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) { + IsUnsafe = true; + return false; + } + } + return true; } bool isDone() const { return IsUnsafe; } }; } namespace llvm { -bool isSafeToExpand(const SCEV *S) { - SCEVFindUnsafe Search; +bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) { + SCEVFindUnsafe Search(SE); visitAll(S, Search); return !Search.IsUnsafe; } diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp index dd2ed4f..f110616 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -119,11 +119,19 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) { const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); switch (Kind) { case NormalizeAutodetect: - if (IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) { - const SCEV *TransformedStep = - TransformSubExpr(AR->getStepRecurrence(SE), - User, OperandValToReplace); - Result = SE.getMinusSCEV(Result, TransformedStep); + // Normalize this SCEV by subtracting the expression for the final step. + // We only allow affine AddRecs to be normalized, otherwise we would not + // be able to correctly denormalize. + // e.g. {1,+,3,+,2} == {-2,+,1,+,2} + {3,+,2} + // Normalized form: {-2,+,1,+,2} + // Denormalized form: {1,+,3,+,2} + // + // However, denormalization would use the a different step expression than + // normalization (see getPostIncExpr), generating the wrong final + // expression: {-2,+,1,+,2} + {1,+,2} => {-1,+,3,+,2} + if (AR->isAffine() && + IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) { + Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE)); Loops.insert(L); } #if 0 diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp index 64f8e96..0353295 100644 --- a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -88,10 +88,19 @@ unsigned TargetTransformInfo::getUserCost(const User *U) const { return PrevTTI->getUserCost(U); } +bool TargetTransformInfo::hasBranchDivergence() const { + return PrevTTI->hasBranchDivergence(); +} + bool TargetTransformInfo::isLoweredToCall(const Function *F) const { return PrevTTI->isLoweredToCall(F); } +void TargetTransformInfo::getUnrollingPreferences(Loop *L, + UnrollingPreferences &UP) const { + PrevTTI->getUnrollingPreferences(L, UP); +} + bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const { return PrevTTI->isLegalAddImmediate(Imm); } @@ -108,6 +117,14 @@ bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, Scale); } +int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, + bool HasBaseReg, + int64_t Scale) const { + return PrevTTI->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, + Scale); +} + bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const { return PrevTTI->isTruncateFree(Ty1, Ty2); } @@ -133,6 +150,10 @@ TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const { return PrevTTI->getPopcntSupport(IntTyWidthInBit); } +bool TargetTransformInfo::haveFastSqrt(Type *Ty) const { + return PrevTTI->haveFastSqrt(Ty); +} + unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { return PrevTTI->getIntImmCost(Imm, Ty); } @@ -198,8 +219,14 @@ unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const { return PrevTTI->getNumberOfParts(Tp); } -unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp) const { - return PrevTTI->getAddressComputationCost(Tp); +unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp, + bool IsComplex) const { + return PrevTTI->getAddressComputationCost(Tp, IsComplex); +} + +unsigned TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwise) const { + return PrevTTI->getReductionCost(Opcode, Ty, IsPairwise); } namespace { @@ -252,26 +279,34 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { // Otherwise, the default basic cost is used. return TCC_Basic; - case Instruction::IntToPtr: + case Instruction::IntToPtr: { + if (!DL) + return TCC_Basic; + // An inttoptr cast is free so long as the input is a legal integer type // which doesn't contain values outside the range of a pointer. - if (DL && DL->isLegalInteger(OpTy->getScalarSizeInBits()) && - OpTy->getScalarSizeInBits() <= DL->getPointerSizeInBits()) + unsigned OpSize = OpTy->getScalarSizeInBits(); + if (DL->isLegalInteger(OpSize) && + OpSize <= DL->getPointerTypeSizeInBits(Ty)) return TCC_Free; // Otherwise it's not a no-op. return TCC_Basic; + } + case Instruction::PtrToInt: { + if (!DL) + return TCC_Basic; - case Instruction::PtrToInt: // A ptrtoint cast is free so long as the result is large enough to store // the pointer, and a legal integer type. - if (DL && DL->isLegalInteger(Ty->getScalarSizeInBits()) && - Ty->getScalarSizeInBits() >= DL->getPointerSizeInBits()) + unsigned DestSize = Ty->getScalarSizeInBits(); + if (DL->isLegalInteger(DestSize) && + DestSize >= DL->getPointerTypeSizeInBits(OpTy)) return TCC_Free; // Otherwise it's not a no-op. return TCC_Basic; - + } case Instruction::Trunc: // trunc to a native type is free (assuming the target has compare and // shift-right of the same width). @@ -411,6 +446,8 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { U->getOperand(0)->getType() : 0); } + bool hasBranchDivergence() const { return false; } + bool isLoweredToCall(const Function *F) const { // FIXME: These should almost certainly not be handled here, and instead // handled with the help of TLI or the target itself. This was largely @@ -442,6 +479,8 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { return true; } + void getUnrollingPreferences(Loop *, UnrollingPreferences &) const { } + bool isLegalAddImmediate(int64_t Imm) const { return false; } @@ -457,6 +496,15 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { return !BaseGV && BaseOffset == 0 && Scale <= 1; } + int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg, int64_t Scale) const { + // Guess that all legal addressing mode are free. + if(isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale)) + return 0; + return -1; + } + + bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; } @@ -481,6 +529,10 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { return PSK_Software; } + bool haveFastSqrt(Type *Ty) const { + return false; + } + unsigned getIntImmCost(const APInt &Imm, Type *Ty) const { return 1; } @@ -542,9 +594,13 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { return 0; } - unsigned getAddressComputationCost(Type *Tp) const { + unsigned getAddressComputationCost(Type *Tp, bool) const { return 0; } + + unsigned getReductionCost(unsigned, Type *, bool) const { + return 1; + } }; } // end anonymous namespace diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index bbf3c3a..6791d4b 100644 --- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -16,7 +16,12 @@ // typical C/C++ TBAA, but it can also be used to implement custom alias // analysis behavior for other languages. // -// The current metadata format is very simple. TBAA MDNodes have up to +// We now support two types of metadata format: scalar TBAA and struct-path +// aware TBAA. After all testing cases are upgraded to use struct-path aware +// TBAA and we can auto-upgrade existing bc files, the support for scalar TBAA +// can be dropped. +// +// The scalar TBAA metadata format is very simple. TBAA MDNodes have up to // three fields, e.g.: // !0 = metadata !{ metadata !"an example type tree" } // !1 = metadata !{ metadata !"int", metadata !0 } @@ -40,6 +45,65 @@ // should return true; see // http://llvm.org/docs/AliasAnalysis.html#OtherItfs). // +// With struct-path aware TBAA, the MDNodes attached to an instruction using +// "!tbaa" are called path tag nodes. +// +// The path tag node has 4 fields with the last field being optional. +// +// The first field is the base type node, it can be a struct type node +// or a scalar type node. The second field is the access type node, it +// must be a scalar type node. The third field is the offset into the base type. +// The last field has the same meaning as the last field of our scalar TBAA: +// it's an integer which if equal to 1 indicates that the access is "constant". +// +// The struct type node has a name and a list of pairs, one pair for each member +// of the struct. The first element of each pair is a type node (a struct type +// node or a sclar type node), specifying the type of the member, the second +// element of each pair is the offset of the member. +// +// Given an example +// typedef struct { +// short s; +// } A; +// typedef struct { +// uint16_t s; +// A a; +// } B; +// +// For an acess to B.a.s, we attach !5 (a path tag node) to the load/store +// instruction. The base type is !4 (struct B), the access type is !2 (scalar +// type short) and the offset is 4. +// +// !0 = metadata !{metadata !"Simple C/C++ TBAA"} +// !1 = metadata !{metadata !"omnipotent char", metadata !0} // Scalar type node +// !2 = metadata !{metadata !"short", metadata !1} // Scalar type node +// !3 = metadata !{metadata !"A", metadata !2, i64 0} // Struct type node +// !4 = metadata !{metadata !"B", metadata !2, i64 0, metadata !3, i64 4} +// // Struct type node +// !5 = metadata !{metadata !4, metadata !2, i64 4} // Path tag node +// +// The struct type nodes and the scalar type nodes form a type DAG. +// Root (!0) +// char (!1) -- edge to Root +// short (!2) -- edge to char +// A (!3) -- edge with offset 0 to short +// B (!4) -- edge with offset 0 to short and edge with offset 4 to A +// +// To check if two tags (tagX and tagY) can alias, we start from the base type +// of tagX, follow the edge with the correct offset in the type DAG and adjust +// the offset until we reach the base type of tagY or until we reach the Root +// node. +// If we reach the base type of tagY, compare the adjusted offset with +// offset of tagY, return Alias if the offsets are the same, return NoAlias +// otherwise. +// If we reach the Root node, perform the above starting from base type of tagY +// to see if we reach base type of tagX. +// +// If they have different roots, they're part of different potentially +// unrelated type systems, so we return Alias to be conservative. +// If neither node is an ancestor of the other and they have the same root, +// then we say NoAlias. +// // TODO: The current metadata format doesn't support struct // fields. For example: // struct X { @@ -71,7 +135,6 @@ using namespace llvm; // achieved by stripping the !tbaa tags from IR, but this option is sometimes // more convenient. static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true)); -static cl::opt<bool> EnableStructPathTBAA("struct-path-tbaa", cl::init(false)); namespace { /// TBAANode - This is a simple wrapper around an MDNode which provides a @@ -168,8 +231,12 @@ namespace { if (Node->getNumOperands() < 2) return TBAAStructTypeNode(); - // Special handling for a scalar type node. + // Fast path for a scalar type node and a struct type node with a single + // field. if (Node->getNumOperands() <= 3) { + uint64_t Cur = Node->getNumOperands() == 2 ? 0 : + cast<ConstantInt>(Node->getOperand(2))->getZExtValue(); + Offset -= Cur; MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); if (!P) return TBAAStructTypeNode(); @@ -259,12 +326,21 @@ TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AliasAnalysis::getAnalysisUsage(AU); } +/// Check the first operand of the tbaa tag node, if it is a MDNode, we treat +/// it as struct-path aware TBAA format, otherwise, we treat it as scalar TBAA +/// format. +static bool isStructPathTBAA(const MDNode *MD) { + // Anonymous TBAA root starts with a MDNode and dragonegg uses it as + // a TBAA tag. + return isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3; +} + /// Aliases - Test whether the type represented by A may alias the /// type represented by B. bool TypeBasedAliasAnalysis::Aliases(const MDNode *A, const MDNode *B) const { - if (EnableStructPathTBAA) + if (isStructPathTBAA(A)) return PathAliases(A, B); // Keep track of the root node for A and B. @@ -397,8 +473,8 @@ bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc, // If this is an "immutable" type, we can assume the pointer is pointing // to constant memory. - if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) || - (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable())) + if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) || + (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable())) return true; return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); @@ -414,8 +490,8 @@ TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { // If this is an "immutable" type, we can assume the call doesn't write // to memory. if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) - if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) || - (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable())) + if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) || + (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable())) Min = OnlyReadsMemory; return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); @@ -458,6 +534,25 @@ TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, return AliasAnalysis::getModRefInfo(CS1, CS2); } +bool MDNode::isTBAAVtableAccess() const { + if (!isStructPathTBAA(this)) { + if (getNumOperands() < 1) return false; + if (MDString *Tag1 = dyn_cast<MDString>(getOperand(0))) { + if (Tag1->getString() == "vtable pointer") return true; + } + return false; + } + + // For struct-path aware TBAA, we use the access type of the tag. + if (getNumOperands() < 2) return false; + MDNode *Tag = cast_or_null<MDNode>(getOperand(1)); + if (!Tag) return false; + if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) { + if (Tag1->getString() == "vtable pointer") return true; + } + return false; +} + MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { if (!A || !B) return NULL; @@ -466,7 +561,8 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { return A; // For struct-path aware TBAA, we use the access type of the tag. - if (EnableStructPathTBAA) { + bool StructPath = isStructPathTBAA(A); + if (StructPath) { A = cast_or_null<MDNode>(A->getOperand(1)); if (!A) return 0; B = cast_or_null<MDNode>(B->getOperand(1)); @@ -499,7 +595,7 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { --IA; --IB; } - if (!EnableStructPathTBAA) + if (!StructPath) return Ret; if (!Ret) diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp index 45dcc5e..e39ee62 100644 --- a/contrib/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalAlias.h" @@ -39,8 +40,8 @@ const unsigned MaxDepth = 6; static unsigned getBitWidth(Type *Ty, const DataLayout *TD) { if (unsigned BitWidth = Ty->getScalarSizeInBits()) return BitWidth; - assert(isa<PointerType>(Ty) && "Expected a pointer type!"); - return TD ? TD->getPointerSizeInBits() : 0; + + return TD ? TD->getPointerTypeSizeInBits(Ty) : 0; } static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, @@ -290,7 +291,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, } if (Align > 0) KnownZero = APInt::getLowBitsSet(BitWidth, - CountTrailingZeros_32(Align)); + countTrailingZeros(Align)); else KnownZero.clearAllBits(); KnownOne.clearAllBits(); @@ -321,7 +322,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, } if (Align) - KnownZero = APInt::getLowBitsSet(BitWidth, CountTrailingZeros_32(Align)); + KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align)); return; } @@ -613,7 +614,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, Align = TD->getABITypeAlignment(AI->getType()->getElementType()); if (Align > 0) - KnownZero = APInt::getLowBitsSet(BitWidth, CountTrailingZeros_32(Align)); + KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align)); break; } case Instruction::GetElementPtr: { @@ -629,12 +630,22 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, Value *Index = I->getOperand(i); if (StructType *STy = dyn_cast<StructType>(*GTI)) { // Handle struct member offset arithmetic. - if (!TD) return; - const StructLayout *SL = TD->getStructLayout(STy); + if (!TD) + return; + + // Handle case when index is vector zeroinitializer + Constant *CIndex = cast<Constant>(Index); + if (CIndex->isZeroValue()) + continue; + + if (CIndex->getType()->isVectorTy()) + Index = CIndex->getSplatValue(); + unsigned Idx = cast<ConstantInt>(Index)->getZExtValue(); + const StructLayout *SL = TD->getStructLayout(STy); uint64_t Offset = SL->getElementOffset(Idx); - TrailZ = std::min(TrailZ, - CountTrailingZeros_64(Offset)); + TrailZ = std::min<unsigned>(TrailZ, + countTrailingZeros(Offset)); } else { // Handle array index arithmetic. Type *IndexedTy = GTI.getIndexedType(); @@ -644,7 +655,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0); ComputeMaskedBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1); TrailZ = std::min(TrailZ, - unsigned(CountTrailingZeros_64(TypeSize) + + unsigned(countTrailingZeros(TypeSize) + LocalKnownZero.countTrailingOnes())); } } @@ -749,7 +760,6 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); break; } - case Intrinsic::x86_sse42_crc32_64_8: case Intrinsic::x86_sse42_crc32_64_64: KnownZero = APInt::getHighBitsSet(64, 32); break; @@ -855,6 +865,37 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) { return false; } + // Adding a power-of-two or zero to the same power-of-two or zero yields + // either the original power-of-two, a larger power-of-two or zero. + if (match(V, m_Add(m_Value(X), m_Value(Y)))) { + OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V); + if (OrZero || VOBO->hasNoUnsignedWrap() || VOBO->hasNoSignedWrap()) { + if (match(X, m_And(m_Specific(Y), m_Value())) || + match(X, m_And(m_Value(), m_Specific(Y)))) + if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth)) + return true; + if (match(Y, m_And(m_Specific(X), m_Value())) || + match(Y, m_And(m_Value(), m_Specific(X)))) + if (isKnownToBeAPowerOfTwo(X, OrZero, Depth)) + return true; + + unsigned BitWidth = V->getType()->getScalarSizeInBits(); + APInt LHSZeroBits(BitWidth, 0), LHSOneBits(BitWidth, 0); + ComputeMaskedBits(X, LHSZeroBits, LHSOneBits, 0, Depth); + + APInt RHSZeroBits(BitWidth, 0), RHSOneBits(BitWidth, 0); + ComputeMaskedBits(Y, RHSZeroBits, RHSOneBits, 0, Depth); + // If i8 V is a power of two or zero: + // ZeroBits: 1 1 1 0 1 1 1 1 + // ~ZeroBits: 0 0 0 1 0 0 0 0 + if ((~(LHSZeroBits & RHSZeroBits)).isPowerOf2()) + // If OrZero isn't set, we cannot give back a zero result. + // Make sure either the LHS or RHS has a bit set. + if (OrZero || RHSOneBits.getBoolValue() || LHSOneBits.getBoolValue()) + return true; + } + } + // An exact divide or right shift can only shift off zero bits, so the result // is a power of two only if the first operand is a power of two and not // copying a sign bit (sdiv int_min, 2). @@ -1509,7 +1550,7 @@ Value *llvm::isBytewiseValue(Value *V) { // struct. To is the result struct built so far, new insertvalue instructions // build on that. static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, - SmallVector<unsigned, 10> &Idxs, + SmallVectorImpl<unsigned> &Idxs, unsigned IdxSkip, Instruction *InsertBefore) { llvm::StructType *STy = dyn_cast<llvm::StructType>(IndexedType); @@ -1673,20 +1714,24 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, /// it can be expressed as a base pointer plus a constant offset. Return the /// base and offset to the caller. Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, - const DataLayout *TD) { + const DataLayout *DL) { // Without DataLayout, conservatively assume 64-bit offsets, which is // the widest we support. - unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64; + unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(Ptr->getType()) : 64; APInt ByteOffset(BitWidth, 0); while (1) { if (Ptr->getType()->isVectorTy()) break; if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) { - APInt GEPOffset(BitWidth, 0); - if (TD && !GEP->accumulateConstantOffset(*TD, GEPOffset)) - break; - ByteOffset += GEPOffset; + if (DL) { + APInt GEPOffset(BitWidth, 0); + if (!GEP->accumulateConstantOffset(*DL, GEPOffset)) + break; + + ByteOffset += GEPOffset; + } + Ptr = GEP->getPointerOperand(); } else if (Operator::getOpcode(Ptr) == Instruction::BitCast) { Ptr = cast<Operator>(Ptr)->getOperand(0); @@ -2019,7 +2064,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, /// isKnownNonNull - Return true if we know that the specified value is never /// null. -bool llvm::isKnownNonNull(const Value *V) { +bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { // Alloca never returns null, malloc might. if (isa<AllocaInst>(V)) return true; @@ -2030,5 +2075,10 @@ bool llvm::isKnownNonNull(const Value *V) { // Global values are not null unless extern weak. if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) return !GV->hasExternalWeakLinkage(); + + // operator new never returns null. + if (isOperatorNewLikeFn(V, TLI, /*LookThroughBitCast=*/true)) + return true; + return false; } |