diff options
Diffstat (limited to 'contrib/llvm/lib/Analysis')
62 files changed, 2828 insertions, 1571 deletions
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp index 752edd5..210b80a 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp @@ -28,14 +28,14 @@ #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/BasicBlock.h" -#include "llvm/Function.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" -#include "llvm/Type.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; @@ -361,8 +361,28 @@ AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) { } namespace { + // Conservatively return true. Return false, if there is a single path + // starting from "From" and the path does not reach "To". + static bool hasPath(const BasicBlock *From, const BasicBlock *To) { + const unsigned MaxCheck = 5; + const BasicBlock *Current = From; + for (unsigned I = 0; I < MaxCheck; I++) { + unsigned NumSuccs = Current->getTerminator()->getNumSuccessors(); + if (NumSuccs > 1) + return true; + if (NumSuccs == 0) + return false; + Current = Current->getTerminator()->getSuccessor(0); + if (Current == To) + return true; + } + return true; + } + /// Only find pointer captures which happen before the given instruction. Uses /// the dominator tree to determine whether one instruction is before another. + /// Only support the case where the Value is defined in the same basic block + /// as the given instruction and the use. struct CapturesBefore : public CaptureTracker { CapturesBefore(const Instruction *I, DominatorTree *DT) : BeforeHere(I), DT(DT), Captured(false) {} @@ -372,8 +392,15 @@ namespace { bool shouldExplore(Use *U) { Instruction *I = cast<Instruction>(U->getUser()); BasicBlock *BB = I->getParent(); - if (BeforeHere != I && - (!DT->isReachableFromEntry(BB) || DT->dominates(BeforeHere, I))) + // We explore this usage only if the usage can reach "BeforeHere". + // If use is not reachable from entry, there is no need to explore. + if (BeforeHere != I && !DT->isReachableFromEntry(BB)) + return false; + // If the value is defined in the same basic block as use and BeforeHere, + // there is no need to explore the use if BeforeHere dominates use. + // Check whether there is a path from I to BeforeHere. + if (BeforeHere != I && DT->dominates(BeforeHere, I) && + !hasPath(BB, BeforeHere->getParent())) return false; return true; } @@ -381,8 +408,11 @@ namespace { bool captured(Use *U) { Instruction *I = cast<Instruction>(U->getUser()); BasicBlock *BB = I->getParent(); - if (BeforeHere != I && - (!DT->isReachableFromEntry(BB) || DT->dominates(BeforeHere, I))) + // Same logic as in shouldExplore. + if (BeforeHere != I && !DT->isReachableFromEntry(BB)) + return false; + if (BeforeHere != I && DT->dominates(BeforeHere, I) && + !hasPath(BB, BeforeHere->getParent())) return false; Captured = true; return true; @@ -503,7 +533,7 @@ bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1, bool llvm::isNoAliasCall(const Value *V) { if (isa<CallInst>(V) || isa<InvokeInst>(V)) return ImmutableCallSite(cast<Instruction>(V)) - .paramHasAttr(0, Attributes::NoAlias); + .paramHasAttr(0, Attribute::NoAlias); return false; } @@ -525,19 +555,3 @@ bool llvm::isIdentifiedObject(const Value *V) { return A->hasNoAliasAttr() || A->hasByValAttr(); return false; } - -/// isKnownNonNull - Return true if we know that the specified value is never -/// null. -bool llvm::isKnownNonNull(const Value *V) { - // Alloca never returns null, malloc might. - if (isa<AllocaInst>(V)) return true; - - // A byval argument is never null. - if (const Argument *A = dyn_cast<Argument>(V)) - return A->hasByValAttr(); - - // Global values are not null unless extern weak. - if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) - return !GV->hasExternalWeakLinkage(); - return false; -} diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp index 9f219f5..9f4a47c 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp +++ b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp @@ -13,9 +13,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/Passes.h" -#include "llvm/Pass.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Assembly/Writer.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp index ac72983..a571463 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -17,19 +17,19 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/Pass.h" #include "llvm/Analysis/Passes.h" +#include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Assembly/Writer.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/InstIterator.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SetVector.h" using namespace llvm; static cl::opt<bool> PrintAll("print-all-alias-modref-info", cl::ReallyHidden); @@ -44,6 +44,8 @@ static cl::opt<bool> PrintMod("print-mod", cl::ReallyHidden); static cl::opt<bool> PrintRef("print-ref", cl::ReallyHidden); static cl::opt<bool> PrintModRef("print-modref", cl::ReallyHidden); +static cl::opt<bool> EvalTBAA("evaluate-tbaa", cl::ReallyHidden); + namespace { class AAEval : public FunctionPass { unsigned NoAlias, MayAlias, PartialAlias, MustAlias; @@ -123,6 +125,15 @@ PrintModRefResults(const char *Msg, bool P, CallSite CSA, CallSite CSB, } } +static inline void +PrintLoadStoreResults(const char *Msg, bool P, const Value *V1, + const Value *V2, const Module *M) { + if (P) { + errs() << " " << Msg << ": " << *V1 + << " <-> " << *V2 << '\n'; + } +} + static inline bool isInterestingPointer(Value *V) { return V->getType()->isPointerTy() && !isa<ConstantPointerNull>(V); @@ -133,6 +144,8 @@ bool AAEval::runOnFunction(Function &F) { SetVector<Value *> Pointers; SetVector<CallSite> CallSites; + SetVector<Value *> Loads; + SetVector<Value *> Stores; for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) if (I->getType()->isPointerTy()) // Add all pointer arguments. @@ -141,6 +154,10 @@ bool AAEval::runOnFunction(Function &F) { for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { if (I->getType()->isPointerTy()) // Add all pointer instructions. Pointers.insert(&*I); + if (EvalTBAA && isa<LoadInst>(&*I)) + Loads.insert(&*I); + if (EvalTBAA && isa<StoreInst>(&*I)) + Stores.insert(&*I); Instruction &Inst = *I; if (CallSite CS = cast<Value>(&Inst)) { Value *Callee = CS.getCalledValue(); @@ -197,6 +214,61 @@ bool AAEval::runOnFunction(Function &F) { } } + if (EvalTBAA) { + // iterate over all pairs of load, store + for (SetVector<Value *>::iterator I1 = Loads.begin(), E = Loads.end(); + I1 != E; ++I1) { + for (SetVector<Value *>::iterator I2 = Stores.begin(), E2 = Stores.end(); + I2 != E2; ++I2) { + switch (AA.alias(AA.getLocation(cast<LoadInst>(*I1)), + AA.getLocation(cast<StoreInst>(*I2)))) { + case AliasAnalysis::NoAlias: + PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2, + F.getParent()); + ++NoAlias; break; + case AliasAnalysis::MayAlias: + PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2, + F.getParent()); + ++MayAlias; break; + case AliasAnalysis::PartialAlias: + PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2, + F.getParent()); + ++PartialAlias; break; + case AliasAnalysis::MustAlias: + PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2, + F.getParent()); + ++MustAlias; break; + } + } + } + + // iterate over all pairs of store, store + for (SetVector<Value *>::iterator I1 = Stores.begin(), E = Stores.end(); + I1 != E; ++I1) { + for (SetVector<Value *>::iterator I2 = Stores.begin(); I2 != I1; ++I2) { + switch (AA.alias(AA.getLocation(cast<StoreInst>(*I1)), + AA.getLocation(cast<StoreInst>(*I2)))) { + case AliasAnalysis::NoAlias: + PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2, + F.getParent()); + ++NoAlias; break; + case AliasAnalysis::MayAlias: + PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2, + F.getParent()); + ++MayAlias; break; + case AliasAnalysis::PartialAlias: + PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2, + F.getParent()); + ++PartialAlias; break; + case AliasAnalysis::MustAlias: + PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2, + F.getParent()); + ++MustAlias; break; + } + } + } + } + // Mod/ref alias analysis: compare all pairs of calls and values for (SetVector<CallSite>::iterator C = CallSites.begin(), Ce = CallSites.end(); C != Ce; ++C) { diff --git a/contrib/llvm/lib/Analysis/AliasDebugger.cpp b/contrib/llvm/lib/Analysis/AliasDebugger.cpp index f15c051..f6178e3 100644 --- a/contrib/llvm/lib/Analysis/AliasDebugger.cpp +++ b/contrib/llvm/lib/Analysis/AliasDebugger.cpp @@ -17,12 +17,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/Passes.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/Instructions.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include <set> using namespace llvm; diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp index 388c755..5910526 100644 --- a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp @@ -13,13 +13,13 @@ #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Pass.h" -#include "llvm/Type.h" -#include "llvm/DataLayout.h" #include "llvm/Assembly/Writer.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/InstIterator.h" diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp index 9dc81a6..66e416cd 100644 --- a/contrib/llvm/lib/Analysis/Analysis.cpp +++ b/contrib/llvm/lib/Analysis/Analysis.cpp @@ -9,8 +9,8 @@ #include "llvm-c/Analysis.h" #include "llvm-c/Initialization.h" -#include "llvm/InitializePasses.h" #include "llvm/Analysis/Verifier.h" +#include "llvm/InitializePasses.h" #include <cstring> using namespace llvm; @@ -31,7 +31,6 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeCFGPrinterPass(Registry); initializeCFGOnlyViewerPass(Registry); initializeCFGOnlyPrinterPass(Registry); - initializePrintDbgInfoPass(Registry); initializeDependenceAnalysisPass(Registry); initializeDominanceFrontierPass(Registry); initializeDomViewerPass(Registry); @@ -70,6 +69,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeRegionOnlyPrinterPass(Registry); initializeScalarEvolutionPass(Registry); initializeScalarEvolutionAliasAnalysisPass(Registry); + initializeTargetTransformInfoAnalysisGroup(Registry); initializeTypeBasedAliasAnalysisPass(Registry); } diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 4bb93ee..ae6da1a 100644 --- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -13,28 +13,28 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalAlias.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Operator.h" -#include "llvm/Pass.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Operator.h" +#include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Target/TargetLibraryInfo.h" #include <algorithm> using namespace llvm; @@ -88,7 +88,7 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &TD, const TargetLibraryInfo &TLI, bool RoundToAlign = false) { uint64_t Size; - if (getObjectSize(V, Size, &TD, &TLI, RoundToAlign)) + if (getUnderlyingObjectSize(V, Size, &TD, &TLI, RoundToAlign)) return Size; return AliasAnalysis::UnknownSize; } @@ -631,7 +631,7 @@ BasicAliasAnalysis::getModRefBehavior(const Function *F) { // For intrinsics, we can check the table. if (unsigned iid = F->getIntrinsicID()) { #define GET_INTRINSIC_MODREF_BEHAVIOR -#include "llvm/Intrinsics.gen" +#include "llvm/IR/Intrinsics.gen" #undef GET_INTRINSIC_MODREF_BEHAVIOR } @@ -851,9 +851,13 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // pointers, figure out if the indexes to the GEP tell us anything about the // derived pointer. if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) { + // Do the base pointers alias? + AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0, + UnderlyingV2, UnknownSize, 0); + // Check for geps of non-aliasing underlying pointers where the offsets are // identical. - if (V1Size == V2Size) { + if ((BaseAlias == MayAlias) && V1Size == V2Size) { // Do the base pointers alias assuming type and size. AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size, V1TBAAInfo, UnderlyingV2, @@ -881,10 +885,6 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, GEP1VariableIndices.clear(); } } - - // Do the base pointers alias? - AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0, - UnderlyingV2, UnknownSize, 0); // If we get a No or May, then return it immediately, no amount of analysis // will improve this situation. @@ -1064,39 +1064,20 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, Location(V2, V2Size, V2TBAAInfo)); if (PN > V2) std::swap(Locs.first, Locs.second); - - AliasResult Alias = - aliasCheck(PN->getIncomingValue(0), PNSize, PNTBAAInfo, - PN2->getIncomingValueForBlock(PN->getIncomingBlock(0)), - V2Size, V2TBAAInfo); - if (Alias == MayAlias) - return MayAlias; - - // If the first source of the PHI nodes NoAlias and the other inputs are - // the PHI node itself through some amount of recursion this does not add - // any new information so just return NoAlias. - // bb: - // ptr = ptr2 + 1 - // loop: - // ptr_phi = phi [bb, ptr], [loop, ptr_plus_one] - // ptr2_phi = phi [bb, ptr2], [loop, ptr2_plus_one] - // ... - // ptr_plus_one = gep ptr_phi, 1 - // ptr2_plus_one = gep ptr2_phi, 1 - // We assume for the recursion that the the phis (ptr_phi, ptr2_phi) do - // not alias each other. - bool ArePhisAssumedNoAlias = false; - AliasResult OrigAliasResult = NoAlias; - if (Alias == NoAlias) { - // Pretend the phis do not alias. - assert(AliasCache.count(Locs) && - "There must exist an entry for the phi node"); - OrigAliasResult = AliasCache[Locs]; - AliasCache[Locs] = NoAlias; - ArePhisAssumedNoAlias = true; - } - - for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { + // Analyse the PHIs' inputs under the assumption that the PHIs are + // NoAlias. + // If the PHIs are May/MustAlias there must be (recursively) an input + // operand from outside the PHIs' cycle that is MayAlias/MustAlias or + // there must be an operation on the PHIs within the PHIs' value cycle + // that causes a MayAlias. + // Pretend the phis do not alias. + AliasResult Alias = NoAlias; + assert(AliasCache.count(Locs) && + "There must exist an entry for the phi node"); + AliasResult OrigAliasResult = AliasCache[Locs]; + AliasCache[Locs] = NoAlias; + + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { AliasResult ThisAlias = aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo, PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)), @@ -1107,7 +1088,7 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, } // Reset if speculation failed. - if (ArePhisAssumedNoAlias && Alias != NoAlias) + if (Alias != NoAlias) AliasCache[Locs] = OrigAliasResult; return Alias; diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp index 8a660f7..100e5c8 100644 --- a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp +++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/InitializePasses.h" -#include "llvm/Analysis/BlockFrequencyImpl.h" #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BlockFrequencyImpl.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/InitializePasses.h" using namespace llvm; diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp index 04a6560..6c58856 100644 --- a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -11,14 +11,14 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" -#include "llvm/Metadata.h" #include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" diff --git a/contrib/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm/lib/Analysis/CFGPrinter.cpp index 7685400..9b6879a 100644 --- a/contrib/llvm/lib/Analysis/CFGPrinter.cpp +++ b/contrib/llvm/lib/Analysis/CFGPrinter.cpp @@ -18,7 +18,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CFGPrinter.h" - #include "llvm/Pass.h" using namespace llvm; diff --git a/contrib/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm/lib/Analysis/CaptureTracking.cpp index d9c0299..a729270 100644 --- a/contrib/llvm/lib/Analysis/CaptureTracking.cpp +++ b/contrib/llvm/lib/Analysis/CaptureTracking.cpp @@ -18,7 +18,12 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/CallSite.h" + using namespace llvm; CaptureTracker::~CaptureTracker() {} diff --git a/contrib/llvm/lib/Analysis/CodeMetrics.cpp b/contrib/llvm/lib/Analysis/CodeMetrics.cpp index 651a54b..8cda01a 100644 --- a/contrib/llvm/lib/Analysis/CodeMetrics.cpp +++ b/contrib/llvm/lib/Analysis/CodeMetrics.cpp @@ -12,121 +12,22 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CodeMetrics.h" -#include "llvm/Function.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/CallSite.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/DataLayout.h" using namespace llvm; -/// callIsSmall - If a call is likely to lower to a single target instruction, -/// or is otherwise deemed small return true. -/// TODO: Perhaps calls like memcpy, strcpy, etc? -bool llvm::callIsSmall(ImmutableCallSite CS) { - if (isa<IntrinsicInst>(CS.getInstruction())) - return true; - - const Function *F = CS.getCalledFunction(); - if (!F) return false; - - if (F->hasLocalLinkage()) return false; - - if (!F->hasName()) return false; - - StringRef Name = F->getName(); - - // These will all likely lower to a single selection DAG node. - if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || - Name == "fabs" || Name == "fabsf" || Name == "fabsl" || - Name == "sin" || Name == "sinf" || Name == "sinl" || - Name == "cos" || Name == "cosf" || Name == "cosl" || - Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ) - return true; - - // These are all likely to be optimized into something smaller. - if (Name == "pow" || Name == "powf" || Name == "powl" || - Name == "exp2" || Name == "exp2l" || Name == "exp2f" || - Name == "floor" || Name == "floorf" || Name == "ceil" || - Name == "round" || Name == "ffs" || Name == "ffsl" || - Name == "abs" || Name == "labs" || Name == "llabs") - return true; - - return false; -} - -bool llvm::isInstructionFree(const Instruction *I, const DataLayout *TD) { - if (isa<PHINode>(I)) - return true; - - // If a GEP has all constant indices, it will probably be folded with - // a load/store. - if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) - return GEP->hasAllConstantIndices(); - - if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { - switch (II->getIntrinsicID()) { - default: - return false; - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::objectsize: - case Intrinsic::ptr_annotation: - case Intrinsic::var_annotation: - // These intrinsics don't count as size. - return true; - } - } - - if (const CastInst *CI = dyn_cast<CastInst>(I)) { - // Noop casts, including ptr <-> int, don't count. - if (CI->isLosslessCast()) - return true; - - Value *Op = CI->getOperand(0); - // An inttoptr cast is free so long as the input is a legal integer type - // which doesn't contain values outside the range of a pointer. - if (isa<IntToPtrInst>(CI) && TD && - TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) && - Op->getType()->getScalarSizeInBits() <= TD->getPointerSizeInBits()) - return true; - - // A ptrtoint cast is free so long as the result is large enough to store - // the pointer, and a legal integer type. - if (isa<PtrToIntInst>(CI) && TD && - TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) && - Op->getType()->getScalarSizeInBits() >= TD->getPointerSizeInBits()) - return true; - - // trunc to a native type is free (assuming the target has compare and - // shift-right of the same width). - if (TD && isa<TruncInst>(CI) && - TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType()))) - return true; - // Result of a cmp instruction is often extended (to be used by other - // cmp instructions, logical or return instructions). These are usually - // nop on most sane targets. - if (isa<CmpInst>(CI->getOperand(0))) - return true; - } - - return false; -} - /// analyzeBasicBlock - Fill in the current structure with information gleaned /// from the specified block. void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, - const DataLayout *TD) { + const TargetTransformInfo &TTI) { ++NumBlocks; unsigned NumInstsBeforeThisBB = NumInsts; for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); II != E; ++II) { - if (isInstructionFree(II, TD)) - continue; - // Special handling for calls. if (isa<CallInst>(II) || isa<InvokeInst>(II)) { ImmutableCallSite CS(cast<Instruction>(II)); @@ -144,12 +45,10 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, // for that case. if (F == BB->getParent()) isRecursive = true; - } - - if (!callIsSmall(CS)) { - // Each argument to a call takes on average one instruction to set up. - NumInsts += CS.arg_size(); + if (TTI.isLoweredToCall(F)) + ++NumCalls; + } else { // We don't want inline asm to count as a call - that would prevent loop // unrolling. The argument setup cost is still real, though. if (!isa<InlineAsm>(CS.getCalledValue())) @@ -165,7 +64,15 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy()) ++NumVectorInsts; - ++NumInsts; + if (const CallInst *CI = dyn_cast<CallInst>(II)) + if (CI->hasFnAttr(Attribute::NoDuplicate)) + notDuplicatable = true; + + if (const InvokeInst *InvI = dyn_cast<InvokeInst>(II)) + if (InvI->hasFnAttr(Attribute::NoDuplicate)) + notDuplicatable = true; + + NumInsts += TTI.getUserCost(&*II); } if (isa<ReturnInst>(BB->getTerminator())) @@ -182,23 +89,8 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, // if someone is using a blockaddress without an indirectbr, and that // reference somehow ends up in another function or global, we probably // don't want to inline this function. - if (isa<IndirectBrInst>(BB->getTerminator())) - containsIndirectBr = true; + notDuplicatable |= isa<IndirectBrInst>(BB->getTerminator()); // Remember NumInsts for this BB. NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB; } - -void CodeMetrics::analyzeFunction(Function *F, const DataLayout *TD) { - // If this function contains a call that "returns twice" (e.g., setjmp or - // _setjmp) and it isn't marked with "returns twice" itself, never inline it. - // This is a hack because we depend on the user marking their local variables - // as volatile if they are live across a setjmp call, and they probably - // won't do this in callers. - exposesReturnsTwice = F->callsFunctionThatReturnsTwice() && - !F->getFnAttributes().hasAttribute(Attributes::ReturnsTwice); - - // Look at the size of the callee. - for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - analyzeBasicBlock(&*BB, TD); -} diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp index 91a5b84..09d7608 100644 --- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp +++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp @@ -9,30 +9,30 @@ // // This file defines routines for folding instructions into constants. // -// Also, to supplement the basic VMCore ConstantExpr simplifications, +// Also, to supplement the basic IR ConstantExpr simplifications, // this file defines some additional folding routines that can make use of -// DataLayout information. These functions cannot go in VMCore due to library +// DataLayout information. These functions cannot go in IR due to library // dependency issues. // //===----------------------------------------------------------------------===// #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/Operator.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FEnv.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/FEnv.h" +#include "llvm/Target/TargetLibraryInfo.h" #include <cerrno> #include <cmath> using namespace llvm; @@ -54,13 +54,12 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, // Handle a vector->integer cast. if (IntegerType *IT = dyn_cast<IntegerType>(DestTy)) { - ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(C); - if (CDV == 0) + VectorType *VTy = dyn_cast<VectorType>(C->getType()); + if (VTy == 0) return ConstantExpr::getBitCast(C, DestTy); - unsigned NumSrcElts = CDV->getType()->getNumElements(); - - Type *SrcEltTy = CDV->getType()->getElementType(); + unsigned NumSrcElts = VTy->getNumElements(); + Type *SrcEltTy = VTy->getElementType(); // If the vector is a vector of floating point, convert it to vector of int // to simplify things. @@ -68,11 +67,14 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); Type *SrcIVTy = VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts); - // Ask VMCore to do the conversion now that #elts line up. + // Ask IR to do the conversion now that #elts line up. C = ConstantExpr::getBitCast(C, SrcIVTy); - CDV = cast<ConstantDataVector>(C); } + ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(C); + if (CDV == 0) + return ConstantExpr::getBitCast(C, DestTy); + // Now that we know that the input value is a vector of integers, just shift // and insert them into our result. unsigned BitShift = TD.getTypeAllocSizeInBits(SrcEltTy); @@ -104,7 +106,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, if (!isa<ConstantDataVector>(C) && !isa<ConstantVector>(C)) return ConstantExpr::getBitCast(C, DestTy); - // If the element types match, VMCore can fold it. + // If the element types match, IR can fold it. unsigned NumDstElt = DestVTy->getNumElements(); unsigned NumSrcElt = C->getType()->getVectorNumElements(); if (NumDstElt == NumSrcElt) @@ -131,7 +133,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, // Recursively handle this integer conversion, if possible. C = FoldBitCast(C, DestIVTy, TD); - // Finally, VMCore can handle this now that #elts line up. + // Finally, IR can handle this now that #elts line up. return ConstantExpr::getBitCast(C, DestTy); } @@ -141,9 +143,9 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); Type *SrcIVTy = VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt); - // Ask VMCore to do the conversion now that #elts line up. + // Ask IR to do the conversion now that #elts line up. C = ConstantExpr::getBitCast(C, SrcIVTy); - // If VMCore wasn't able to fold it, bail out. + // If IR wasn't able to fold it, bail out. if (!isa<ConstantVector>(C) && // FIXME: Remove ConstantVector. !isa<ConstantDataVector>(C)) return C; @@ -218,10 +220,10 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, /// from a global, return the global and the constant. Because of /// constantexprs, this function is recursive. static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, - int64_t &Offset, const DataLayout &TD) { + APInt &Offset, const DataLayout &TD) { // Trivial case, constant is the global. if ((GV = dyn_cast<GlobalValue>(C))) { - Offset = 0; + Offset.clearAllBits(); return true; } @@ -235,34 +237,13 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD); // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) - if (CE->getOpcode() == Instruction::GetElementPtr) { - // Cannot compute this if the element type of the pointer is missing size - // info. - if (!cast<PointerType>(CE->getOperand(0)->getType()) - ->getElementType()->isSized()) - return false; - + if (GEPOperator *GEP = dyn_cast<GEPOperator>(CE)) { // If the base isn't a global+constant, we aren't either. if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD)) return false; // Otherwise, add any offset that our operands provide. - gep_type_iterator GTI = gep_type_begin(CE); - for (User::const_op_iterator i = CE->op_begin() + 1, e = CE->op_end(); - i != e; ++i, ++GTI) { - ConstantInt *CI = dyn_cast<ConstantInt>(*i); - if (!CI) return false; // Index isn't a simple constant? - if (CI->isZero()) continue; // Not adding anything. - - if (StructType *ST = dyn_cast<StructType>(*GTI)) { - // N = N + Offset - Offset += TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue()); - } else { - SequentialType *SQT = cast<SequentialType>(*GTI); - Offset += TD.getTypeAllocSize(SQT->getElementType())*CI->getSExtValue(); - } - } - return true; + return GEP->accumulateConstantOffset(TD, Offset); } return false; @@ -310,6 +291,10 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), TD); return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD); } + if (CFP->getType()->isHalfTy()){ + C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), TD); + return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD); + } return false; } @@ -402,7 +387,9 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, // that address spaces don't matter here since we're not going to result in // an actual new load. Type *MapTy; - if (LoadTy->isFloatTy()) + if (LoadTy->isHalfTy()) + MapTy = Type::getInt16PtrTy(C->getContext()); + else if (LoadTy->isFloatTy()) MapTy = Type::getInt32PtrTy(C->getContext()); else if (LoadTy->isDoubleTy()) MapTy = Type::getInt64PtrTy(C->getContext()); @@ -423,7 +410,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, if (BytesLoaded > 32 || BytesLoaded == 0) return 0; GlobalValue *GVal; - int64_t Offset; + APInt Offset(TD.getPointerSizeInBits(), 0); if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD)) return 0; @@ -434,14 +421,15 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, // If we're loading off the beginning of the global, some bytes may be valid, // but we don't try to handle this. - if (Offset < 0) return 0; + if (Offset.isNegative()) return 0; // If we're not accessing anything in this constant, the result is undefined. - if (uint64_t(Offset) >= TD.getTypeAllocSize(GV->getInitializer()->getType())) + if (Offset.getZExtValue() >= + TD.getTypeAllocSize(GV->getInitializer()->getType())) return UndefValue::get(IntType); unsigned char RawBytes[32] = {0}; - if (!ReadDataFromGlobal(GV->getInitializer(), Offset, RawBytes, + if (!ReadDataFromGlobal(GV->getInitializer(), Offset.getZExtValue(), RawBytes, BytesLoaded, TD)) return 0; @@ -550,10 +538,10 @@ static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){ /// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression. /// Attempt to symbolically evaluate the result of a binary operator merging -/// these together. If target data info is available, it is provided as TD, -/// otherwise TD is null. +/// these together. If target data info is available, it is provided as DL, +/// otherwise DL is null. static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, - Constant *Op1, const DataLayout *TD){ + Constant *Op1, const DataLayout *DL){ // SROA // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl. @@ -561,17 +549,44 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, // bits. + if (Opc == Instruction::And && DL) { + unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType()); + APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0); + APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0); + ComputeMaskedBits(Op0, KnownZero0, KnownOne0, DL); + ComputeMaskedBits(Op1, KnownZero1, KnownOne1, DL); + if ((KnownOne1 | KnownZero0).isAllOnesValue()) { + // All the bits of Op0 that the 'and' could be masking are already zero. + return Op0; + } + if ((KnownOne0 | KnownZero1).isAllOnesValue()) { + // All the bits of Op1 that the 'and' could be masking are already zero. + return Op1; + } + + APInt KnownZero = KnownZero0 | KnownZero1; + APInt KnownOne = KnownOne0 & KnownOne1; + if ((KnownZero | KnownOne).isAllOnesValue()) { + return ConstantInt::get(Op0->getType(), KnownOne); + } + } + // If the constant expr is something like &A[123] - &A[4].f, fold this into a // constant. This happens frequently when iterating over a global array. - if (Opc == Instruction::Sub && TD) { + if (Opc == Instruction::Sub && DL) { GlobalValue *GV1, *GV2; - int64_t Offs1, Offs2; + unsigned PtrSize = DL->getPointerSizeInBits(); + unsigned OpSize = DL->getTypeSizeInBits(Op0->getType()); + APInt Offs1(PtrSize, 0), Offs2(PtrSize, 0); - if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *TD)) - if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *TD) && + if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *DL)) + if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *DL) && GV1 == GV2) { // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow. - return ConstantInt::get(Op0->getType(), Offs1-Offs2); + // PtrToInt may change the bitwidth so we have convert to the right size + // first. + return ConstantInt::get(Op0->getType(), Offs1.zextOrTrunc(OpSize) - + Offs2.zextOrTrunc(OpSize)); } } @@ -1104,6 +1119,13 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C, bool llvm::canConstantFoldCallTo(const Function *F) { switch (F->getIntrinsicID()) { + case Intrinsic::fabs: + case Intrinsic::log: + case Intrinsic::log2: + case Intrinsic::log10: + case Intrinsic::exp: + case Intrinsic::exp2: + case Intrinsic::floor: case Intrinsic::sqrt: case Intrinsic::pow: case Intrinsic::powi: @@ -1142,8 +1164,7 @@ llvm::canConstantFoldCallTo(const Function *F) { switch (Name[0]) { default: return false; case 'a': - return Name == "acos" || Name == "asin" || - Name == "atan" || Name == "atan2"; + return Name == "acos" || Name == "asin" || Name == "atan" || Name =="atan2"; case 'c': return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh"; case 'e': @@ -1171,11 +1192,17 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, return 0; } + if (Ty->isHalfTy()) { + APFloat APF(V); + bool unused; + APF.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &unused); + return ConstantFP::get(Ty->getContext(), APF); + } if (Ty->isFloatTy()) return ConstantFP::get(Ty->getContext(), APFloat((float)V)); if (Ty->isDoubleTy()) return ConstantFP::get(Ty->getContext(), APFloat(V)); - llvm_unreachable("Can only constant fold float/double"); + llvm_unreachable("Can only constant fold half/float/double"); } static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), @@ -1187,11 +1214,17 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), return 0; } + if (Ty->isHalfTy()) { + APFloat APF(V); + bool unused; + APF.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &unused); + return ConstantFP::get(Ty->getContext(), APF); + } if (Ty->isFloatTy()) return ConstantFP::get(Ty->getContext(), APFloat((float)V)); if (Ty->isDoubleTy()) return ConstantFP::get(Ty->getContext(), APFloat(V)); - llvm_unreachable("Can only constant fold float/double"); + llvm_unreachable("Can only constant fold half/float/double"); } /// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer @@ -1243,7 +1276,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, if (!TLI) return 0; - if (!Ty->isFloatTy() && !Ty->isDoubleTy()) + if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) return 0; /// We only fold functions with finite arguments. Folding NaN and inf is @@ -1256,8 +1289,46 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, /// the host native double versions. Float versions are not called /// directly but for all these it is true (float)(f((double)arg)) == /// f(arg). Long double not supported yet. - double V = Ty->isFloatTy() ? (double)Op->getValueAPF().convertToFloat() : - Op->getValueAPF().convertToDouble(); + double V; + if (Ty->isFloatTy()) + V = Op->getValueAPF().convertToFloat(); + else if (Ty->isDoubleTy()) + V = Op->getValueAPF().convertToDouble(); + else { + bool unused; + APFloat APF = Op->getValueAPF(); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused); + V = APF.convertToDouble(); + } + + switch (F->getIntrinsicID()) { + default: break; + case Intrinsic::fabs: + return ConstantFoldFP(fabs, V, Ty); +#if HAVE_LOG2 + case Intrinsic::log2: + return ConstantFoldFP(log2, V, Ty); +#endif +#if HAVE_LOG + case Intrinsic::log: + return ConstantFoldFP(log, V, Ty); +#endif +#if HAVE_LOG10 + case Intrinsic::log10: + return ConstantFoldFP(log10, V, Ty); +#endif +#if HAVE_EXP + case Intrinsic::exp: + return ConstantFoldFP(exp, V, Ty); +#endif +#if HAVE_EXP2 + case Intrinsic::exp2: + return ConstantFoldFP(exp2, V, Ty); +#endif + case Intrinsic::floor: + return ConstantFoldFP(floor, V, Ty); + } + switch (Name[0]) { case 'a': if (Name == "acos" && TLI->has(LibFunc::acos)) @@ -1299,7 +1370,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, else if (Name == "log10" && V > 0 && TLI->has(LibFunc::log10)) return ConstantFoldFP(log10, V, Ty); else if (F->getIntrinsicID() == Intrinsic::sqrt && - (Ty->isFloatTy() || Ty->isDoubleTy())) { + (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())) { if (V >= -0.0) return ConstantFoldFP(sqrt, V, Ty); else // Undefined @@ -1337,7 +1408,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, case Intrinsic::ctpop: return ConstantInt::get(Ty, Op->getValue().countPopulation()); case Intrinsic::convert_from_fp16: { - APFloat Val(Op->getValue()); + APFloat Val(APFloat::IEEEhalf, Op->getValue()); bool lost = false; APFloat::opStatus status = @@ -1391,18 +1462,35 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, if (Operands.size() == 2) { if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) { - if (!Ty->isFloatTy() && !Ty->isDoubleTy()) + if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) return 0; - double Op1V = Ty->isFloatTy() ? - (double)Op1->getValueAPF().convertToFloat() : - Op1->getValueAPF().convertToDouble(); + double Op1V; + if (Ty->isFloatTy()) + Op1V = Op1->getValueAPF().convertToFloat(); + else if (Ty->isDoubleTy()) + Op1V = Op1->getValueAPF().convertToDouble(); + else { + bool unused; + APFloat APF = Op1->getValueAPF(); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused); + Op1V = APF.convertToDouble(); + } + if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) { if (Op2->getType() != Op1->getType()) return 0; - double Op2V = Ty->isFloatTy() ? - (double)Op2->getValueAPF().convertToFloat(): - Op2->getValueAPF().convertToDouble(); + double Op2V; + if (Ty->isFloatTy()) + Op2V = Op2->getValueAPF().convertToFloat(); + else if (Ty->isDoubleTy()) + Op2V = Op2->getValueAPF().convertToDouble(); + else { + bool unused; + APFloat APF = Op2->getValueAPF(); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused); + Op2V = APF.convertToDouble(); + } if (F->getIntrinsicID() == Intrinsic::pow) { return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); @@ -1416,6 +1504,10 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, if (Name == "atan2" && TLI->has(LibFunc::atan2)) return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) { + if (F->getIntrinsicID() == Intrinsic::powi && Ty->isHalfTy()) + return ConstantFP::get(F->getContext(), + APFloat((float)std::pow((float)Op1V, + (int)Op2C->getZExtValue()))); if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy()) return ConstantFP::get(F->getContext(), APFloat((float)std::pow((float)Op1V, @@ -1468,12 +1560,12 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, return ConstantStruct::get(cast<StructType>(F->getReturnType()), Ops); } case Intrinsic::cttz: - // FIXME: This should check for Op2 == 1, and become unreachable if - // Op1 == 0. + if (Op2->isOne() && Op1->isZero()) // cttz(0, 1) is undef. + return UndefValue::get(Ty); return ConstantInt::get(Ty, Op1->getValue().countTrailingZeros()); case Intrinsic::ctlz: - // FIXME: This should check for Op2 == 1, and become unreachable if - // Op1 == 0. + if (Op2->isOne() && Op1->isZero()) // ctlz(0, 1) is undef. + return UndefValue::get(Ty); return ConstantInt::get(Ty, Op1->getValue().countLeadingZeros()); } } diff --git a/contrib/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm/lib/Analysis/CostModel.cpp index 5adbf45..98a7780 100644 --- a/contrib/llvm/lib/Analysis/CostModel.cpp +++ b/contrib/llvm/lib/Analysis/CostModel.cpp @@ -8,20 +8,24 @@ //===----------------------------------------------------------------------===// // // This file defines the cost model analysis. It provides a very basic cost -// estimation for LLVM-IR. The cost result can be thought of as cycles, but it -// is really unit-less. The estimated cost is ment to be used for comparing -// alternatives. +// estimation for LLVM-IR. This analysis uses the services of the codegen +// to approximate the cost of any IR instruction when lowered to machine +// instructions. The cost results are unit-less and the cost number represents +// the throughput of the machine assuming that all loads hit the cache, all +// branches are predicted, etc. The cost numbers can be added in order to +// compare two or more transformation alternatives. // //===----------------------------------------------------------------------===// #define CM_NAME "cost-model" #define DEBUG_TYPE CM_NAME #include "llvm/Analysis/Passes.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" -#include "llvm/TargetTransformInfo.h" -#include "llvm/Value.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -31,7 +35,7 @@ namespace { public: static char ID; // Class identification, replacement for typeinfo - CostModelAnalysis() : FunctionPass(ID), F(0), VTTI(0) { + CostModelAnalysis() : FunctionPass(ID), F(0), TTI(0) { initializeCostModelAnalysisPass( *PassRegistry::getPassRegistry()); } @@ -40,7 +44,7 @@ namespace { /// Returns -1 if the cost is unknown. /// Note, this method does not cache the cost calculation and it /// can be expensive in some cases. - unsigned getInstructionCost(Instruction *I) const; + unsigned getInstructionCost(const Instruction *I) const; private: virtual void getAnalysisUsage(AnalysisUsage &AU) const; @@ -49,8 +53,8 @@ namespace { /// The function that we analyze. Function *F; - /// Vector target information. - const VectorTargetTransformInfo *VTTI; + /// Target information. + const TargetTransformInfo *TTI; }; } // End of anonymous namespace @@ -72,25 +76,49 @@ CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { bool CostModelAnalysis::runOnFunction(Function &F) { this->F = &F; - - // Target information. - TargetTransformInfo *TTI; TTI = getAnalysisIfAvailable<TargetTransformInfo>(); - if (TTI) - VTTI = TTI->getVectorTargetTransformInfo(); return false; } -unsigned CostModelAnalysis::getInstructionCost(Instruction *I) const { - if (!VTTI) +static bool isReverseVectorMask(SmallVector<int, 16> &Mask) { + for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i) + if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i)) + return false; + return true; +} + +static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { + TargetTransformInfo::OperandValueKind OpInfo = + TargetTransformInfo::OK_AnyValue; + + // Check for a splat of a constant. + ConstantDataVector *CDV = 0; + if ((CDV = dyn_cast<ConstantDataVector>(V))) + if (CDV->getSplatValue() != NULL) + OpInfo = TargetTransformInfo::OK_UniformConstantValue; + ConstantVector *CV = 0; + if ((CV = dyn_cast<ConstantVector>(V))) + if (CV->getSplatValue() != NULL) + OpInfo = TargetTransformInfo::OK_UniformConstantValue; + + return OpInfo; +} + +unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { + if (!TTI) return -1; switch (I->getOpcode()) { + case Instruction::GetElementPtr:{ + Type *ValTy = I->getOperand(0)->getType()->getPointerElementType(); + return TTI->getAddressComputationCost(ValTy); + } + case Instruction::Ret: case Instruction::PHI: case Instruction::Br: { - return VTTI->getCFInstrCost(I->getOpcode()); + return TTI->getCFInstrCost(I->getOpcode()); } case Instruction::Add: case Instruction::FAdd: @@ -110,28 +138,33 @@ unsigned CostModelAnalysis::getInstructionCost(Instruction *I) const { case Instruction::And: case Instruction::Or: case Instruction::Xor: { - return VTTI->getArithmeticInstrCost(I->getOpcode(), I->getType()); + TargetTransformInfo::OperandValueKind Op1VK = + getOperandInfo(I->getOperand(0)); + TargetTransformInfo::OperandValueKind Op2VK = + getOperandInfo(I->getOperand(1)); + return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, + Op2VK); } case Instruction::Select: { - SelectInst *SI = cast<SelectInst>(I); + const SelectInst *SI = cast<SelectInst>(I); Type *CondTy = SI->getCondition()->getType(); - return VTTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy); + return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy); } case Instruction::ICmp: case Instruction::FCmp: { Type *ValTy = I->getOperand(0)->getType(); - return VTTI->getCmpSelInstrCost(I->getOpcode(), ValTy); + return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy); } case Instruction::Store: { - StoreInst *SI = cast<StoreInst>(I); + const StoreInst *SI = cast<StoreInst>(I); Type *ValTy = SI->getValueOperand()->getType(); - return VTTI->getMemoryOpCost(I->getOpcode(), ValTy, + return TTI->getMemoryOpCost(I->getOpcode(), ValTy, SI->getAlignment(), SI->getPointerAddressSpace()); } case Instruction::Load: { - LoadInst *LI = cast<LoadInst>(I); - return VTTI->getMemoryOpCost(I->getOpcode(), I->getType(), + const LoadInst *LI = cast<LoadInst>(I); + return TTI->getMemoryOpCost(I->getOpcode(), I->getType(), LI->getAlignment(), LI->getPointerAddressSpace()); } @@ -148,26 +181,47 @@ unsigned CostModelAnalysis::getInstructionCost(Instruction *I) const { case Instruction::FPTrunc: case Instruction::BitCast: { Type *SrcTy = I->getOperand(0)->getType(); - return VTTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy); + return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy); } case Instruction::ExtractElement: { - ExtractElementInst * EEI = cast<ExtractElementInst>(I); + const ExtractElementInst * EEI = cast<ExtractElementInst>(I); ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); unsigned Idx = -1; if (CI) Idx = CI->getZExtValue(); - return VTTI->getVectorInstrCost(I->getOpcode(), - EEI->getOperand(0)->getType(), Idx); + return TTI->getVectorInstrCost(I->getOpcode(), + EEI->getOperand(0)->getType(), Idx); } case Instruction::InsertElement: { - InsertElementInst * IE = cast<InsertElementInst>(I); + const InsertElementInst * IE = cast<InsertElementInst>(I); ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); unsigned Idx = -1; if (CI) Idx = CI->getZExtValue(); - return VTTI->getVectorInstrCost(I->getOpcode(), - IE->getType(), Idx); + return TTI->getVectorInstrCost(I->getOpcode(), + IE->getType(), Idx); } + case Instruction::ShuffleVector: { + const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); + Type *VecTypOp0 = Shuffle->getOperand(0)->getType(); + unsigned NumVecElems = VecTypOp0->getVectorNumElements(); + SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); + + if (NumVecElems == Mask.size() && isReverseVectorMask(Mask)) + return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0, + 0); + return -1; + } + case Instruction::Call: + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + SmallVector<Type*, 4> Tys; + for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J) + Tys.push_back(II->getArgOperand(J)->getType()); + + return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), + Tys); + } + return -1; default: // We don't have any information on this instruction. return -1; diff --git a/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp b/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp deleted file mode 100644 index 41cd34c..0000000 --- a/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp +++ /dev/null @@ -1,224 +0,0 @@ -//===- DbgInfoPrinter.cpp - Print debug info in a human readable form ------==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a pass that prints instructions, and associated debug -// info: -// -// - source/line/col information -// - original variable name -// - original type name -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo.h" -#include "llvm/Function.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Metadata.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -static cl::opt<bool> -PrintDirectory("print-fullpath", - cl::desc("Print fullpath when printing debug info"), - cl::Hidden); - -namespace { - class PrintDbgInfo : public FunctionPass { - raw_ostream &Out; - void printVariableDeclaration(const Value *V); - public: - static char ID; // Pass identification - PrintDbgInfo() : FunctionPass(ID), Out(errs()) { - initializePrintDbgInfoPass(*PassRegistry::getPassRegistry()); - } - - virtual bool runOnFunction(Function &F); - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - } - }; - char PrintDbgInfo::ID = 0; -} - -INITIALIZE_PASS(PrintDbgInfo, "print-dbginfo", - "Print debug info in human readable form", false, false) - -FunctionPass *llvm::createDbgInfoPrinterPass() { return new PrintDbgInfo(); } - -/// Find the debug info descriptor corresponding to this global variable. -static Value *findDbgGlobalDeclare(GlobalVariable *V) { - const Module *M = V->getParent(); - NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv"); - if (!NMD) - return 0; - - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i))); - if (!DIG.isGlobalVariable()) - continue; - if (DIGlobalVariable(DIG).getGlobal() == V) - return DIG; - } - return 0; -} - -/// Find the debug info descriptor corresponding to this function. -static Value *findDbgSubprogramDeclare(Function *V) { - const Module *M = V->getParent(); - NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp"); - if (!NMD) - return 0; - - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i))); - if (!DIG.isSubprogram()) - continue; - if (DISubprogram(DIG).getFunction() == V) - return DIG; - } - return 0; -} - -/// Finds the llvm.dbg.declare intrinsic corresponding to this value if any. -/// It looks through pointer casts too. -static const DbgDeclareInst *findDbgDeclare(const Value *V) { - V = V->stripPointerCasts(); - - if (!isa<Instruction>(V) && !isa<Argument>(V)) - return 0; - - const Function *F = NULL; - if (const Instruction *I = dyn_cast<Instruction>(V)) - F = I->getParent()->getParent(); - else if (const Argument *A = dyn_cast<Argument>(V)) - F = A->getParent(); - - for (Function::const_iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) - for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end(); - BI != BE; ++BI) - if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) - if (DDI->getAddress() == V) - return DDI; - - return 0; -} - -static bool getLocationInfo(const Value *V, std::string &DisplayName, - std::string &Type, unsigned &LineNo, - std::string &File, std::string &Dir) { - DICompileUnit Unit; - DIType TypeD; - - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(const_cast<Value*>(V))) { - Value *DIGV = findDbgGlobalDeclare(GV); - if (!DIGV) return false; - DIGlobalVariable Var(cast<MDNode>(DIGV)); - - StringRef D = Var.getDisplayName(); - if (!D.empty()) - DisplayName = D; - LineNo = Var.getLineNumber(); - Unit = Var.getCompileUnit(); - TypeD = Var.getType(); - } else if (Function *F = dyn_cast<Function>(const_cast<Value*>(V))){ - Value *DIF = findDbgSubprogramDeclare(F); - if (!DIF) return false; - DISubprogram Var(cast<MDNode>(DIF)); - - StringRef D = Var.getDisplayName(); - if (!D.empty()) - DisplayName = D; - LineNo = Var.getLineNumber(); - Unit = Var.getCompileUnit(); - TypeD = Var.getType(); - } else { - const DbgDeclareInst *DDI = findDbgDeclare(V); - if (!DDI) return false; - DIVariable Var(cast<MDNode>(DDI->getVariable())); - - StringRef D = Var.getName(); - if (!D.empty()) - DisplayName = D; - LineNo = Var.getLineNumber(); - Unit = Var.getCompileUnit(); - TypeD = Var.getType(); - } - - StringRef T = TypeD.getName(); - if (!T.empty()) - Type = T; - StringRef F = Unit.getFilename(); - if (!F.empty()) - File = F; - StringRef D = Unit.getDirectory(); - if (!D.empty()) - Dir = D; - return true; -} - -void PrintDbgInfo::printVariableDeclaration(const Value *V) { - std::string DisplayName, File, Directory, Type; - unsigned LineNo = 0; - - if (!getLocationInfo(V, DisplayName, Type, LineNo, File, Directory)) - return; - - Out << "; "; - WriteAsOperand(Out, V, false, 0); - if (isa<Function>(V)) - Out << " is function " << DisplayName - << " of type " << Type << " declared at "; - else - Out << " is variable " << DisplayName - << " of type " << Type << " declared at "; - - if (PrintDirectory) - Out << Directory << "/"; - - Out << File << ":" << LineNo << "\n"; -} - -bool PrintDbgInfo::runOnFunction(Function &F) { - if (F.isDeclaration()) - return false; - - Out << "function " << F.getName() << "\n\n"; - - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { - BasicBlock *BB = I; - - if (I != F.begin() && (pred_begin(BB) == pred_end(BB))) - // Skip dead blocks. - continue; - - Out << BB->getName(); - Out << ":"; - - Out << "\n"; - - for (BasicBlock::const_iterator i = BB->begin(), e = BB->end(); - i != e; ++i) { - - printVariableDeclaration(i); - - if (const User *U = dyn_cast<User>(i)) { - for(unsigned i=0;i<U->getNumOperands();i++) - printVariableDeclaration(U->getOperand(i)); - } - } - } - return false; -} diff --git a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp index 95ac5ea..cbc71bd 100644 --- a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -55,12 +55,12 @@ #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Operator.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/InstIterator.h" @@ -145,22 +145,20 @@ void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { // Used to test the dependence analyzer. -// Looks through the function, noting the first store instruction -// and the first load instruction -// (which always follows the first load in our tests). -// Calls depends() and prints out the result. +// Looks through the function, noting loads and stores. +// Calls depends() on every possible pair and prints out the result. // Ignores all other instructions. static void dumpExampleDependence(raw_ostream &OS, Function *F, DependenceAnalysis *DA) { for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE; ++SrcI) { - if (const StoreInst *Src = dyn_cast<StoreInst>(&*SrcI)) { + if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) { for (inst_iterator DstI = SrcI, DstE = inst_end(F); DstI != DstE; ++DstI) { - if (const LoadInst *Dst = dyn_cast<LoadInst>(&*DstI)) { + if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) { OS << "da analyze - "; - if (Dependence *D = DA->depends(Src, Dst, true)) { + if (Dependence *D = DA->depends(&*SrcI, &*DstI, true)) { D->dump(OS); for (unsigned Level = 1; Level <= D->getLevels(); Level++) { if (D->isSplitable(Level)) { @@ -173,7 +171,6 @@ void dumpExampleDependence(raw_ostream &OS, Function *F, } else OS << "none!\n"; - return; } } } @@ -224,8 +221,8 @@ bool Dependence::isScalar(unsigned level) const { //===----------------------------------------------------------------------===// // FullDependence methods -FullDependence::FullDependence(const Instruction *Source, - const Instruction *Destination, +FullDependence::FullDependence(Instruction *Source, + Instruction *Destination, bool PossiblyLoopIndependent, unsigned CommonLevels) : Dependence(Source, Destination), @@ -586,42 +583,40 @@ void Dependence::dump(raw_ostream &OS) const { else if (isInput()) OS << "input"; unsigned Levels = getLevels(); - if (Levels) { - OS << " ["; - for (unsigned II = 1; II <= Levels; ++II) { - if (isSplitable(II)) - Splitable = true; - if (isPeelFirst(II)) - OS << 'p'; - const SCEV *Distance = getDistance(II); - if (Distance) - OS << *Distance; - else if (isScalar(II)) - OS << "S"; + OS << " ["; + for (unsigned II = 1; II <= Levels; ++II) { + if (isSplitable(II)) + Splitable = true; + if (isPeelFirst(II)) + OS << 'p'; + const SCEV *Distance = getDistance(II); + if (Distance) + OS << *Distance; + else if (isScalar(II)) + OS << "S"; + else { + unsigned Direction = getDirection(II); + if (Direction == DVEntry::ALL) + OS << "*"; else { - unsigned Direction = getDirection(II); - if (Direction == DVEntry::ALL) - OS << "*"; - else { - if (Direction & DVEntry::LT) - OS << "<"; - if (Direction & DVEntry::EQ) - OS << "="; - if (Direction & DVEntry::GT) - OS << ">"; - } + if (Direction & DVEntry::LT) + OS << "<"; + if (Direction & DVEntry::EQ) + OS << "="; + if (Direction & DVEntry::GT) + OS << ">"; } - if (isPeelLast(II)) - OS << 'p'; - if (II < Levels) - OS << " "; } - if (isLoopIndependent()) - OS << "|<"; - OS << "]"; - if (Splitable) - OS << " splitable"; + if (isPeelLast(II)) + OS << 'p'; + if (II < Levels) + OS << " "; } + if (isLoopIndependent()) + OS << "|<"; + OS << "]"; + if (Splitable) + OS << " splitable"; } OS << "!\n"; } @@ -652,10 +647,10 @@ bool isLoadOrStore(const Instruction *I) { static -const Value *getPointerOperand(const Instruction *I) { - if (const LoadInst *LI = dyn_cast<LoadInst>(I)) +Value *getPointerOperand(Instruction *I) { + if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand(); - if (const StoreInst *SI = dyn_cast<StoreInst>(I)) + if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand(); llvm_unreachable("Value is not load or store instruction"); return 0; @@ -2215,13 +2210,13 @@ const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) { // // It occurs to me that the presence of loop-invariant variables // changes the nature of the test from "greatest common divisor" -// to "a common divisor!" +// to "a common divisor". bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, const SCEV *Dst, FullDependence &Result) const { DEBUG(dbgs() << "starting gcd\n"); ++GCDapplications; - unsigned BitWidth = Src->getType()->getIntegerBitWidth(); + unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); APInt RunningGCD = APInt::getNullValue(BitWidth); // Examine Src coefficients. @@ -3197,42 +3192,42 @@ static void dumpSmallBitVector(SmallBitVector &BV) { // Goff, Kennedy, Tseng // PLDI 1991 // -// Care is required to keep the code below up to date w.r.t. this routine. -Dependence *DependenceAnalysis::depends(const Instruction *Src, - const Instruction *Dst, +// Care is required to keep the routine below, getSplitIteration(), +// up to date with respect to this routine. +Dependence *DependenceAnalysis::depends(Instruction *Src, + Instruction *Dst, bool PossiblyLoopIndependent) { + if (Src == Dst) + PossiblyLoopIndependent = false; + if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) // if both instructions don't reference memory, there's no dependence return NULL; - if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) + if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { // can only analyze simple loads and stores, i.e., no calls, invokes, etc. + DEBUG(dbgs() << "can only handle simple loads and stores\n"); return new Dependence(Src, Dst); + } - const Value *SrcPtr = getPointerOperand(Src); - const Value *DstPtr = getPointerOperand(Dst); + Value *SrcPtr = getPointerOperand(Src); + Value *DstPtr = getPointerOperand(Dst); switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) { case AliasAnalysis::MayAlias: case AliasAnalysis::PartialAlias: // cannot analyse objects if we don't understand their aliasing. + DEBUG(dbgs() << "can't analyze may or partial alias\n"); return new Dependence(Src, Dst); case AliasAnalysis::NoAlias: // If the objects noalias, they are distinct, accesses are independent. + DEBUG(dbgs() << "no alias\n"); return NULL; case AliasAnalysis::MustAlias: break; // The underlying objects alias; test accesses for dependence. } - const GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); - const GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); - if (!SrcGEP || !DstGEP) - return new Dependence(Src, Dst); // missing GEP, assume dependence - - if (SrcGEP->getPointerOperandType() != DstGEP->getPointerOperandType()) - return new Dependence(Src, Dst); // different types, assume dependence - // establish loop nesting levels establishNestingLevels(Src, Dst); DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); @@ -3241,36 +3236,62 @@ Dependence *DependenceAnalysis::depends(const Instruction *Src, FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); ++TotalArrayPairs; - // classify subscript pairs - unsigned Pairs = SrcGEP->idx_end() - SrcGEP->idx_begin(); + // See if there are GEPs we can use. + bool UsefulGEP = false; + GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); + GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); + if (SrcGEP && DstGEP && + SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { + const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); + const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); + DEBUG(dbgs() << " SrcPtrSCEV = " << *SrcPtrSCEV << "\n"); + DEBUG(dbgs() << " DstPtrSCEV = " << *DstPtrSCEV << "\n"); + + UsefulGEP = + isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && + isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())); + } + unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; SmallVector<Subscript, 4> Pair(Pairs); - for (unsigned SI = 0; SI < Pairs; ++SI) { - Pair[SI].Loops.resize(MaxLevels + 1); - Pair[SI].GroupLoops.resize(MaxLevels + 1); - Pair[SI].Group.resize(Pairs); - } - Pairs = 0; - for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), - SrcEnd = SrcGEP->idx_end(), - DstIdx = DstGEP->idx_begin(), - DstEnd = DstGEP->idx_end(); - SrcIdx != SrcEnd && DstIdx != DstEnd; - ++SrcIdx, ++DstIdx, ++Pairs) { - Pair[Pairs].Src = SE->getSCEV(*SrcIdx); - Pair[Pairs].Dst = SE->getSCEV(*DstIdx); - removeMatchingExtensions(&Pair[Pairs]); - Pair[Pairs].Classification = - classifyPair(Pair[Pairs].Src, LI->getLoopFor(Src->getParent()), - Pair[Pairs].Dst, LI->getLoopFor(Dst->getParent()), - Pair[Pairs].Loops); - Pair[Pairs].GroupLoops = Pair[Pairs].Loops; - Pair[Pairs].Group.set(Pairs); - DEBUG(dbgs() << " subscript " << Pairs << "\n"); - DEBUG(dbgs() << "\tsrc = " << *Pair[Pairs].Src << "\n"); - DEBUG(dbgs() << "\tdst = " << *Pair[Pairs].Dst << "\n"); - DEBUG(dbgs() << "\tclass = " << Pair[Pairs].Classification << "\n"); + if (UsefulGEP) { + DEBUG(dbgs() << " using GEPs\n"); + unsigned P = 0; + for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), + SrcEnd = SrcGEP->idx_end(), + DstIdx = DstGEP->idx_begin(); + SrcIdx != SrcEnd; + ++SrcIdx, ++DstIdx, ++P) { + Pair[P].Src = SE->getSCEV(*SrcIdx); + Pair[P].Dst = SE->getSCEV(*DstIdx); + } + } + else { + DEBUG(dbgs() << " ignoring GEPs\n"); + const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); + const SCEV *DstSCEV = SE->getSCEV(DstPtr); + DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); + DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); + Pair[0].Src = SrcSCEV; + Pair[0].Dst = DstSCEV; + } + + for (unsigned P = 0; P < Pairs; ++P) { + Pair[P].Loops.resize(MaxLevels + 1); + Pair[P].GroupLoops.resize(MaxLevels + 1); + Pair[P].Group.resize(Pairs); + removeMatchingExtensions(&Pair[P]); + Pair[P].Classification = + classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), + Pair[P].Dst, LI->getLoopFor(Dst->getParent()), + Pair[P].Loops); + Pair[P].GroupLoops = Pair[P].Loops; + Pair[P].Group.set(P); + DEBUG(dbgs() << " subscript " << P << "\n"); + DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); + DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); + DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); DEBUG(dbgs() << "\tloops = "); - DEBUG(dumpSmallBitVector(Pair[Pairs].Loops)); + DEBUG(dumpSmallBitVector(Pair[P].Loops)); } SmallBitVector Separable(Pairs); @@ -3532,7 +3553,7 @@ Dependence *DependenceAnalysis::depends(const Instruction *Src, } } - // make sure Scalar flags are set correctly + // Make sure the Scalar flags are set correctly. SmallBitVector CompleteLoops(MaxLevels + 1); for (unsigned SI = 0; SI < Pairs; ++SI) CompleteLoops |= Pair[SI].Loops; @@ -3540,8 +3561,10 @@ Dependence *DependenceAnalysis::depends(const Instruction *Src, if (CompleteLoops[II]) Result.DV[II - 1].Scalar = false; - // make sure loopIndepent flag is set correctly if (PossiblyLoopIndependent) { + // Make sure the LoopIndependent flag is set correctly. + // All directions must include equal, otherwise no + // loop-independent dependence is possible. for (unsigned II = 1; II <= CommonLevels; ++II) { if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { Result.LoopIndependent = false; @@ -3549,6 +3572,19 @@ Dependence *DependenceAnalysis::depends(const Instruction *Src, } } } + else { + // On the other hand, if all directions are equal and there's no + // loop-independent dependence possible, then no dependence exists. + bool AllEqual = true; + for (unsigned II = 1; II <= CommonLevels; ++II) { + if (Result.getDirection(II) != Dependence::DVEntry::EQ) { + AllEqual = false; + break; + } + } + if (AllEqual) + return NULL; + } FullDependence *Final = new FullDependence(Result); Result.DV = NULL; @@ -3565,7 +3601,8 @@ Dependence *DependenceAnalysis::depends(const Instruction *Src, // though simplified since we know that the dependence exists. // It's tedious, since we must go through all propagations, etc. // -// Care is required to keep this code up to date w.r.t. the code above. +// Care is required to keep this code up to date with respect to the routine +// above, depends(). // // Generally, the dependence analyzer will be used to build // a dependence graph for a function (basically a map from instructions @@ -3608,50 +3645,65 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, assert(Dep && "expected a pointer to a Dependence"); assert(Dep->isSplitable(SplitLevel) && "Dep should be splitable at SplitLevel"); - const Instruction *Src = Dep->getSrc(); - const Instruction *Dst = Dep->getDst(); + Instruction *Src = Dep->getSrc(); + Instruction *Dst = Dep->getDst(); assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); assert(isLoadOrStore(Src)); assert(isLoadOrStore(Dst)); - const Value *SrcPtr = getPointerOperand(Src); - const Value *DstPtr = getPointerOperand(Dst); + Value *SrcPtr = getPointerOperand(Src); + Value *DstPtr = getPointerOperand(Dst); assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) == AliasAnalysis::MustAlias); - const GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); - const GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); - assert(SrcGEP); - assert(DstGEP); - assert(SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()); // establish loop nesting levels establishNestingLevels(Src, Dst); FullDependence Result(Src, Dst, false, CommonLevels); - // classify subscript pairs - unsigned Pairs = SrcGEP->idx_end() - SrcGEP->idx_begin(); + // See if there are GEPs we can use. + bool UsefulGEP = false; + GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr); + GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr); + if (SrcGEP && DstGEP && + SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { + const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); + const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); + UsefulGEP = + isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && + isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())); + } + unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; SmallVector<Subscript, 4> Pair(Pairs); - for (unsigned SI = 0; SI < Pairs; ++SI) { - Pair[SI].Loops.resize(MaxLevels + 1); - Pair[SI].GroupLoops.resize(MaxLevels + 1); - Pair[SI].Group.resize(Pairs); - } - Pairs = 0; - for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), - SrcEnd = SrcGEP->idx_end(), - DstIdx = DstGEP->idx_begin(), - DstEnd = DstGEP->idx_end(); - SrcIdx != SrcEnd && DstIdx != DstEnd; - ++SrcIdx, ++DstIdx, ++Pairs) { - Pair[Pairs].Src = SE->getSCEV(*SrcIdx); - Pair[Pairs].Dst = SE->getSCEV(*DstIdx); - Pair[Pairs].Classification = - classifyPair(Pair[Pairs].Src, LI->getLoopFor(Src->getParent()), - Pair[Pairs].Dst, LI->getLoopFor(Dst->getParent()), - Pair[Pairs].Loops); - Pair[Pairs].GroupLoops = Pair[Pairs].Loops; - Pair[Pairs].Group.set(Pairs); + if (UsefulGEP) { + unsigned P = 0; + for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), + SrcEnd = SrcGEP->idx_end(), + DstIdx = DstGEP->idx_begin(); + SrcIdx != SrcEnd; + ++SrcIdx, ++DstIdx, ++P) { + Pair[P].Src = SE->getSCEV(*SrcIdx); + Pair[P].Dst = SE->getSCEV(*DstIdx); + } + } + else { + const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); + const SCEV *DstSCEV = SE->getSCEV(DstPtr); + Pair[0].Src = SrcSCEV; + Pair[0].Dst = DstSCEV; + } + + for (unsigned P = 0; P < Pairs; ++P) { + Pair[P].Loops.resize(MaxLevels + 1); + Pair[P].GroupLoops.resize(MaxLevels + 1); + Pair[P].Group.resize(Pairs); + removeMatchingExtensions(&Pair[P]); + Pair[P].Classification = + classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), + Pair[P].Dst, LI->getLoopFor(Dst->getParent()), + Pair[P].Loops); + Pair[P].GroupLoops = Pair[P].Loops; + Pair[P].Group.set(P); } SmallBitVector Separable(Pairs); diff --git a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp index 3e537e9..7e4a89f 100644 --- a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp +++ b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/DominanceFrontier.h" -#include "llvm/Support/Debug.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Assembly/Writer.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp index dec0ece..7620fd9 100644 --- a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp +++ b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp @@ -13,9 +13,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CallGraph.h" -#include "llvm/Module.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp index 449b7ee..a0d788f 100644 --- a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -16,13 +16,13 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "cgscc-passmgr" -#include "llvm/CallGraphSCCPass.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Function.h" -#include "llvm/PassManagers.h" -#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/PassManagers.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" @@ -51,6 +51,9 @@ public: /// whether any of the passes modifies the module, and if so, return true. bool runOnModule(Module &M); + using ModulePass::doInitialization; + using ModulePass::doFinalization; + bool doInitialization(CallGraph &CG); bool doFinalization(CallGraph &CG); diff --git a/contrib/llvm/lib/Analysis/IPA/CallPrinter.cpp b/contrib/llvm/lib/Analysis/IPA/CallPrinter.cpp new file mode 100644 index 0000000..306ae7a --- /dev/null +++ b/contrib/llvm/lib/Analysis/IPA/CallPrinter.cpp @@ -0,0 +1,87 @@ +//===- CallPrinter.cpp - DOT printer for call graph -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines '-dot-callgraph', which emit a callgraph.<fnname>.dot +// containing the call graph of a module. +// +// There is also a pass available to directly call dotty ('-view-callgraph'). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallPrinter.h" +#include "llvm/Analysis/DOTGraphTraitsPass.h" + +using namespace llvm; + +namespace llvm { + +template<> +struct DOTGraphTraits<CallGraph*> : public DefaultDOTGraphTraits { + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(CallGraph *Graph) { + return "Call graph"; + } + + std::string getNodeLabel(CallGraphNode *Node, CallGraph *Graph) { + if (Function *Func = Node->getFunction()) + return Func->getName(); + + return "external node"; + } +}; + +} // end llvm namespace + +namespace { + +struct CallGraphViewer + : public DOTGraphTraitsModuleViewer<CallGraph, true> { + static char ID; + + CallGraphViewer() + : DOTGraphTraitsModuleViewer<CallGraph, true>("callgraph", ID) { + initializeCallGraphViewerPass(*PassRegistry::getPassRegistry()); + } +}; + +struct CallGraphPrinter + : public DOTGraphTraitsModulePrinter<CallGraph, true> { + static char ID; + + CallGraphPrinter() + : DOTGraphTraitsModulePrinter<CallGraph, true>("callgraph", ID) { + initializeCallGraphPrinterPass(*PassRegistry::getPassRegistry()); + } +}; + +} // end anonymous namespace + +char CallGraphViewer::ID = 0; +INITIALIZE_PASS(CallGraphViewer, "view-callgraph", + "View call graph", + false, false) + +char CallGraphPrinter::ID = 0; +INITIALIZE_PASS(CallGraphPrinter, "dot-callgraph", + "Print call graph to 'dot' file", + false, false) + +// Create methods available outside of this file, to use them +// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by +// the link time optimization. + +ModulePass *llvm::createCallGraphViewerPass() { + return new CallGraphViewer(); +} + +ModulePass *llvm::createCallGraphPrinterPass() { + return new CallGraphPrinter(); +} diff --git a/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp b/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp index e9df3ca..1c4f17d 100644 --- a/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp +++ b/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp @@ -14,10 +14,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/FindUsedTypes.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" #include "llvm/Assembly/Writer.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" #include "llvm/Support/InstIterator.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp index 990caa8..92d0d23 100644 --- a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp @@ -16,20 +16,20 @@ #define DEBUG_TYPE "globalsmodref-aa" #include "llvm/Analysis/Passes.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/Instructions.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/InstIterator.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SCCIterator.h" #include <set> using namespace llvm; diff --git a/contrib/llvm/lib/Analysis/IPA/IPA.cpp b/contrib/llvm/lib/Analysis/IPA/IPA.cpp index 0ba2e04..aa5164e 100644 --- a/contrib/llvm/lib/Analysis/IPA/IPA.cpp +++ b/contrib/llvm/lib/Analysis/IPA/IPA.cpp @@ -20,6 +20,8 @@ using namespace llvm; void llvm::initializeIPA(PassRegistry &Registry) { initializeBasicCallGraphPass(Registry); initializeCallGraphAnalysisGroup(Registry); + initializeCallGraphPrinterPass(Registry); + initializeCallGraphViewerPass(Registry); initializeFindUsedTypesPass(Registry); initializeGlobalsModRefPass(Registry); } diff --git a/contrib/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp index 5f51f77..35c45e6 100644 --- a/contrib/llvm/lib/Analysis/InlineCost.cpp +++ b/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp @@ -13,23 +13,24 @@ #define DEBUG_TYPE "inline-cost" #include "llvm/Analysis/InlineCost.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" +#include "llvm/InstVisitor.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/InstVisitor.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/CallingConv.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Operator.h" -#include "llvm/GlobalAlias.h" -#include "llvm/DataLayout.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; @@ -44,17 +45,21 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { // DataLayout if available, or null. const DataLayout *const TD; + /// The TargetTransformInfo available for this compilation. + const TargetTransformInfo &TTI; + // The called function. Function &F; int Threshold; int Cost; - const bool AlwaysInline; bool IsCallerRecursive; bool IsRecursiveCall; bool ExposesReturnsTwice; bool HasDynamicAlloca; + bool ContainsNoDuplicateCall; + /// Number of bytes allocated statically by the callee. uint64_t AllocatedSize; unsigned NumInstructions, NumVectorInstructions; @@ -95,6 +100,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { int InstructionCost); bool isGEPOffsetConstant(GetElementPtrInst &GEP); bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); + bool simplifyCallSite(Function *F, CallSite CS); ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); // Custom analysis routines. @@ -123,26 +129,27 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { bool visitBinaryOperator(BinaryOperator &I); bool visitLoad(LoadInst &I); bool visitStore(StoreInst &I); + bool visitExtractValue(ExtractValueInst &I); + bool visitInsertValue(InsertValueInst &I); bool visitCallSite(CallSite CS); public: - CallAnalyzer(const DataLayout *TD, Function &Callee, int Threshold) - : TD(TD), F(Callee), Threshold(Threshold), Cost(0), - AlwaysInline(F.getFnAttributes().hasAttribute(Attributes::AlwaysInline)), - IsCallerRecursive(false), IsRecursiveCall(false), - ExposesReturnsTwice(false), HasDynamicAlloca(false), AllocatedSize(0), - NumInstructions(0), NumVectorInstructions(0), - FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), - NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), - NumConstantPtrCmps(0), NumConstantPtrDiffs(0), - NumInstructionsSimplified(0), SROACostSavings(0), SROACostSavingsLost(0) { - } + CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI, + Function &Callee, int Threshold) + : TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0), + IsCallerRecursive(false), IsRecursiveCall(false), + ExposesReturnsTwice(false), HasDynamicAlloca(false), + ContainsNoDuplicateCall(false), AllocatedSize(0), NumInstructions(0), + NumVectorInstructions(0), FiftyPercentVectorBonus(0), + TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), + NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), + NumConstantPtrDiffs(0), NumInstructionsSimplified(0), + SROACostSavings(0), SROACostSavingsLost(0) {} bool analyzeCall(CallSite CS); int getThreshold() { return Threshold; } int getCost() { return Cost; } - bool isAlwaysInline() { return AlwaysInline; } // Keep a bunch of stats about the cost savings found so we can print them // out when debugging. @@ -281,9 +288,8 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) { Ty->getPrimitiveSizeInBits()); } - // We will happily inline static alloca instructions or dynamic alloca - // instructions in always-inline situations. - if (AlwaysInline || I.isStaticAlloca()) + // We will happily inline static alloca instructions. + if (I.isStaticAlloca()) return Base::visitAlloca(I); // FIXME: This is overly conservative. Dynamic allocas are inefficient for @@ -357,7 +363,10 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { bool CallAnalyzer::visitBitCast(BitCastInst &I) { // Propagate constants through bitcasts. - if (Constant *COp = dyn_cast<Constant>(I.getOperand(0))) + Constant *COp = dyn_cast<Constant>(I.getOperand(0)); + if (!COp) + COp = SimplifiedValues.lookup(I.getOperand(0)); + if (COp) if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) { SimplifiedValues[&I] = C; return true; @@ -382,7 +391,10 @@ bool CallAnalyzer::visitBitCast(BitCastInst &I) { bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { // Propagate constants through ptrtoint. - if (Constant *COp = dyn_cast<Constant>(I.getOperand(0))) + Constant *COp = dyn_cast<Constant>(I.getOperand(0)); + if (!COp) + COp = SimplifiedValues.lookup(I.getOperand(0)); + if (COp) if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) { SimplifiedValues[&I] = C; return true; @@ -410,12 +422,15 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) SROAArgValues[&I] = SROAArg; - return isInstructionFree(&I, TD); + return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); } bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { // Propagate constants through ptrtoint. - if (Constant *COp = dyn_cast<Constant>(I.getOperand(0))) + Constant *COp = dyn_cast<Constant>(I.getOperand(0)); + if (!COp) + COp = SimplifiedValues.lookup(I.getOperand(0)); + if (COp) if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) { SimplifiedValues[&I] = C; return true; @@ -437,12 +452,15 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { if (lookupSROAArgAndCost(Op, SROAArg, CostIt)) SROAArgValues[&I] = SROAArg; - return isInstructionFree(&I, TD); + return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); } bool CallAnalyzer::visitCastInst(CastInst &I) { // Propagate constants through ptrtoint. - if (Constant *COp = dyn_cast<Constant>(I.getOperand(0))) + Constant *COp = dyn_cast<Constant>(I.getOperand(0)); + if (!COp) + COp = SimplifiedValues.lookup(I.getOperand(0)); + if (COp) if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) { SimplifiedValues[&I] = C; return true; @@ -451,15 +469,17 @@ bool CallAnalyzer::visitCastInst(CastInst &I) { // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. disableSROA(I.getOperand(0)); - return isInstructionFree(&I, TD); + return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); } bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { Value *Operand = I.getOperand(0); - Constant *Ops[1] = { dyn_cast<Constant>(Operand) }; - if (Ops[0] || (Ops[0] = SimplifiedValues.lookup(Operand))) + Constant *COp = dyn_cast<Constant>(Operand); + if (!COp) + COp = SimplifiedValues.lookup(Operand); + if (COp) if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(), - Ops, TD)) { + COp, TD)) { SimplifiedValues[&I] = C; return true; } @@ -612,28 +632,105 @@ bool CallAnalyzer::visitStore(StoreInst &I) { return false; } +bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) { + // Constant folding for extract value is trivial. + Constant *C = dyn_cast<Constant>(I.getAggregateOperand()); + if (!C) + C = SimplifiedValues.lookup(I.getAggregateOperand()); + if (C) { + SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices()); + return true; + } + + // SROA can look through these but give them a cost. + return false; +} + +bool CallAnalyzer::visitInsertValue(InsertValueInst &I) { + // Constant folding for insert value is trivial. + Constant *AggC = dyn_cast<Constant>(I.getAggregateOperand()); + if (!AggC) + AggC = SimplifiedValues.lookup(I.getAggregateOperand()); + Constant *InsertedC = dyn_cast<Constant>(I.getInsertedValueOperand()); + if (!InsertedC) + InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand()); + if (AggC && InsertedC) { + SimplifiedValues[&I] = ConstantExpr::getInsertValue(AggC, InsertedC, + I.getIndices()); + return true; + } + + // SROA can look through these but give them a cost. + return false; +} + +/// \brief Try to simplify a call site. +/// +/// Takes a concrete function and callsite and tries to actually simplify it by +/// analyzing the arguments and call itself with instsimplify. Returns true if +/// it has simplified the callsite to some other entity (a constant), making it +/// free. +bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { + // FIXME: Using the instsimplify logic directly for this is inefficient + // because we have to continually rebuild the argument list even when no + // simplifications can be performed. Until that is fixed with remapping + // inside of instsimplify, directly constant fold calls here. + if (!canConstantFoldCallTo(F)) + return false; + + // Try to re-map the arguments to constants. + SmallVector<Constant *, 4> ConstantArgs; + ConstantArgs.reserve(CS.arg_size()); + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I) { + Constant *C = dyn_cast<Constant>(*I); + if (!C) + C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I)); + if (!C) + return false; // This argument doesn't map to a constant. + + ConstantArgs.push_back(C); + } + if (Constant *C = ConstantFoldCall(F, ConstantArgs)) { + SimplifiedValues[CS.getInstruction()] = C; + return true; + } + + return false; +} + bool CallAnalyzer::visitCallSite(CallSite CS) { if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() && - !F.getFnAttributes().hasAttribute(Attributes::ReturnsTwice)) { + !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::ReturnsTwice)) { // This aborts the entire analysis. ExposesReturnsTwice = true; return false; } + if (CS.isCall() && + cast<CallInst>(CS.getInstruction())->hasFnAttr(Attribute::NoDuplicate)) + ContainsNoDuplicateCall = true; - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { - switch (II->getIntrinsicID()) { - default: - return Base::visitCallSite(CS); + if (Function *F = CS.getCalledFunction()) { + // When we have a concrete function, first try to simplify it directly. + if (simplifyCallSite(F, CS)) + return true; - case Intrinsic::memset: - case Intrinsic::memcpy: - case Intrinsic::memmove: - // SROA can usually chew through these intrinsics, but they aren't free. - return false; + // Next check if it is an intrinsic we know about. + // FIXME: Lift this into part of the InstVisitor. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { + switch (II->getIntrinsicID()) { + default: + return Base::visitCallSite(CS); + + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: + // SROA can usually chew through these intrinsics, but they aren't free. + return false; + } } - } - if (Function *F = CS.getCalledFunction()) { if (F == CS.getInstruction()->getParent()->getParent()) { // This flag will fully abort the analysis, so don't bother with anything // else. @@ -641,7 +738,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { return false; } - if (!callIsSmall(CS)) { + if (TTI.isLoweredToCall(F)) { // We account for the average 1 instruction per call argument setup // here. Cost += CS.arg_size() * InlineConstants::InstrCost; @@ -674,7 +771,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { // during devirtualization and so we want to give it a hefty bonus for // inlining, but cap that bonus in the event that inlining wouldn't pan // out. Pretend to inline the function, with a custom threshold. - CallAnalyzer CA(TD, *F, InlineConstants::IndirectCallThreshold); + CallAnalyzer CA(TD, TTI, *F, InlineConstants::IndirectCallThreshold); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the // bonus we want to apply, but don't go below zero. @@ -687,7 +784,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { bool CallAnalyzer::visitInstruction(Instruction &I) { // Some instructions are free. All of the free intrinsics can also be // handled by SROA, etc. - if (isInstructionFree(&I, TD)) + if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I)) return true; // We found something we don't understand or can't handle. Mark any SROA-able @@ -743,7 +840,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { // Check if we've past the threshold so we don't spin in huge basic // blocks that will never inline. - if (!AlwaysInline && Cost > (Threshold + VectorBonus)) + if (Cost > (Threshold + VectorBonus)) return false; } @@ -794,7 +891,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { /// viable. It computes the cost and adjusts the threshold based on numerous /// factors and heuristics. If this method returns false but the computed cost /// is below the computed threshold, then inlining was forcibly disabled by -/// some artifact of the rountine. +/// some artifact of the routine. bool CallAnalyzer::analyzeCall(CallSite CS) { ++NumCallsAnalyzed; @@ -805,70 +902,71 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { int SingleBBBonus = Threshold / 2; Threshold += SingleBBBonus; - // Unless we are always-inlining, perform some tweaks to the cost and - // threshold based on the direct callsite information. - if (!AlwaysInline) { - // We want to more aggressively inline vector-dense kernels, so up the - // threshold, and we'll lower it if the % of vector instructions gets too - // low. - assert(NumInstructions == 0); - assert(NumVectorInstructions == 0); - FiftyPercentVectorBonus = Threshold; - TenPercentVectorBonus = Threshold / 2; - - // Give out bonuses per argument, as the instructions setting them up will - // be gone after inlining. - for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { - if (TD && CS.isByValArgument(I)) { - // We approximate the number of loads and stores needed by dividing the - // size of the byval type by the target's pointer size. - PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); - unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType()); - unsigned PointerSize = TD->getPointerSizeInBits(); - // Ceiling division. - unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; - - // If it generates more than 8 stores it is likely to be expanded as an - // inline memcpy so we take that as an upper bound. Otherwise we assume - // one load and one store per word copied. - // FIXME: The maxStoresPerMemcpy setting from the target should be used - // here instead of a magic number of 8, but it's not available via - // DataLayout. - NumStores = std::min(NumStores, 8U); - - Cost -= 2 * NumStores * InlineConstants::InstrCost; - } else { - // For non-byval arguments subtract off one instruction per call - // argument. - Cost -= InlineConstants::InstrCost; - } + // Perform some tweaks to the cost and threshold based on the direct + // callsite information. + + // We want to more aggressively inline vector-dense kernels, so up the + // threshold, and we'll lower it if the % of vector instructions gets too + // low. + assert(NumInstructions == 0); + assert(NumVectorInstructions == 0); + FiftyPercentVectorBonus = Threshold; + TenPercentVectorBonus = Threshold / 2; + + // Give out bonuses per argument, as the instructions setting them up will + // be gone after inlining. + for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { + if (TD && CS.isByValArgument(I)) { + // We approximate the number of loads and stores needed by dividing the + // size of the byval type by the target's pointer size. + PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); + unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType()); + unsigned PointerSize = TD->getPointerSizeInBits(); + // Ceiling division. + unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; + + // If it generates more than 8 stores it is likely to be expanded as an + // inline memcpy so we take that as an upper bound. Otherwise we assume + // one load and one store per word copied. + // FIXME: The maxStoresPerMemcpy setting from the target should be used + // here instead of a magic number of 8, but it's not available via + // DataLayout. + NumStores = std::min(NumStores, 8U); + + Cost -= 2 * NumStores * InlineConstants::InstrCost; + } else { + // For non-byval arguments subtract off one instruction per call + // argument. + Cost -= InlineConstants::InstrCost; } + } - // If there is only one call of the function, and it has internal linkage, - // the cost of inlining it drops dramatically. - if (F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction()) - Cost += InlineConstants::LastCallToStaticBonus; - - // If the instruction after the call, or if the normal destination of the - // invoke is an unreachable instruction, the function is noreturn. As such, - // there is little point in inlining this unless there is literally zero - // cost. - Instruction *Instr = CS.getInstruction(); - if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) { - if (isa<UnreachableInst>(II->getNormalDest()->begin())) - Threshold = 1; - } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr))) + // If there is only one call of the function, and it has internal linkage, + // the cost of inlining it drops dramatically. + bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() && + &F == CS.getCalledFunction(); + if (OnlyOneCallAndLocalLinkage) + Cost += InlineConstants::LastCallToStaticBonus; + + // If the instruction after the call, or if the normal destination of the + // invoke is an unreachable instruction, the function is noreturn. As such, + // there is little point in inlining this unless there is literally zero + // cost. + Instruction *Instr = CS.getInstruction(); + if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) { + if (isa<UnreachableInst>(II->getNormalDest()->begin())) Threshold = 1; + } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr))) + Threshold = 1; - // If this function uses the coldcc calling convention, prefer not to inline - // it. - if (F.getCallingConv() == CallingConv::Cold) - Cost += InlineConstants::ColdccPenalty; + // If this function uses the coldcc calling convention, prefer not to inline + // it. + if (F.getCallingConv() == CallingConv::Cold) + Cost += InlineConstants::ColdccPenalty; - // Check if we're done. This can happen due to bonuses and penalties. - if (Cost > Threshold) - return false; - } + // Check if we're done. This can happen due to bonuses and penalties. + if (Cost > Threshold) + return false; if (F.empty()) return true; @@ -930,7 +1028,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { // Bail out the moment we cross the threshold. This means we'll under-count // the cost, but only when undercounting doesn't matter. - if (!AlwaysInline && Cost > (Threshold + VectorBonus)) + if (Cost > (Threshold + VectorBonus)) break; BasicBlock *BB = BBWorklist[Idx]; @@ -1013,9 +1111,15 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { } } + // If this is a noduplicate call, we can still inline as long as + // inlining this would cause the removal of the caller (so the instruction + // is not actually duplicated, just moved). + if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) + return false; + Threshold += VectorBonus; - return AlwaysInline || Cost < Threshold; + return Cost < Threshold; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1030,28 +1134,67 @@ void CallAnalyzer::dump() { DEBUG_PRINT_STAT(NumInstructionsSimplified); DEBUG_PRINT_STAT(SROACostSavings); DEBUG_PRINT_STAT(SROACostSavingsLost); + DEBUG_PRINT_STAT(ContainsNoDuplicateCall); #undef DEBUG_PRINT_STAT } #endif -InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, int Threshold) { +INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", + true, true) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) +INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", + true, true) + +char InlineCostAnalysis::ID = 0; + +InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID), TD(0) {} + +InlineCostAnalysis::~InlineCostAnalysis() {} + +void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<TargetTransformInfo>(); + CallGraphSCCPass::getAnalysisUsage(AU); +} + +bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) { + TD = getAnalysisIfAvailable<DataLayout>(); + TTI = &getAnalysis<TargetTransformInfo>(); + return false; +} + +InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) { return getInlineCost(CS, CS.getCalledFunction(), Threshold); } -InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee, +InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, int Threshold) { + // Cannot inline indirect calls. + if (!Callee) + return llvm::InlineCost::getNever(); + + // Calls to functions with always-inline attributes should be inlined + // whenever possible. + if (Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::AlwaysInline)) { + if (isInlineViable(*Callee)) + return llvm::InlineCost::getAlways(); + return llvm::InlineCost::getNever(); + } + // Don't inline functions which can be redefined at link-time to mean // something else. Don't inline functions marked noinline or call sites // marked noinline. - if (!Callee || Callee->mayBeOverridden() || - Callee->getFnAttributes().hasAttribute(Attributes::NoInline) || + if (Callee->mayBeOverridden() || + Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoInline) || CS.isNoInline()) return llvm::InlineCost::getNever(); DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); - CallAnalyzer CA(TD, *Callee, Threshold); + CallAnalyzer CA(TD, *TTI, *Callee, Threshold); bool ShouldInline = CA.analyzeCall(CS); DEBUG(CA.dump()); @@ -1059,9 +1202,38 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee, // Check if there was a reason to force inlining or no inlining. if (!ShouldInline && CA.getCost() < CA.getThreshold()) return InlineCost::getNever(); - if (ShouldInline && (CA.isAlwaysInline() || - CA.getCost() >= CA.getThreshold())) + if (ShouldInline && CA.getCost() >= CA.getThreshold()) return InlineCost::getAlways(); return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); } + +bool InlineCostAnalysis::isInlineViable(Function &F) { + bool ReturnsTwice = + F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::ReturnsTwice); + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + // Disallow inlining of functions which contain an indirect branch. + if (isa<IndirectBrInst>(BI->getTerminator())) + return false; + + for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; + ++II) { + CallSite CS(II); + if (!CS) + continue; + + // Disallow recursive calls. + if (&F == CS.getCalledFunction()) + return false; + + // Disallow calls which expose returns-twice to a function not previously + // attributed as such. + if (!ReturnsTwice && CS.isCall() && + cast<CallInst>(CS.getInstruction())->canReturnTwice()) + return false; + } + } + + return true; +} diff --git a/contrib/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm/lib/Analysis/IVUsers.cpp index d4221b8..b33e2cb 100644 --- a/contrib/llvm/lib/Analysis/IVUsers.cpp +++ b/contrib/llvm/lib/Analysis/IVUsers.cpp @@ -14,17 +14,17 @@ #define DEBUG_TYPE "iv-users" #include "llvm/Analysis/IVUsers.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/Type.h" -#include "llvm/DerivedTypes.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/DataLayout.h" #include "llvm/Assembly/Writer.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> diff --git a/contrib/llvm/lib/Analysis/InstCount.cpp b/contrib/llvm/lib/Analysis/InstCount.cpp index 3b385d2..75a49eb 100644 --- a/contrib/llvm/lib/Analysis/InstCount.cpp +++ b/contrib/llvm/lib/Analysis/InstCount.cpp @@ -13,13 +13,13 @@ #define DEBUG_TYPE "instcount" #include "llvm/Analysis/Passes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Function.h" +#include "llvm/InstVisitor.h" #include "llvm/Pass.h" -#include "llvm/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/InstVisitor.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(TotalInsts , "Number of instructions (of all types)"); @@ -30,7 +30,7 @@ STATISTIC(TotalMemInst, "Number of memory instructions"); #define HANDLE_INST(N, OPCODE, CLASS) \ STATISTIC(Num ## OPCODE ## Inst, "Number of " #OPCODE " insts"); -#include "llvm/Instruction.def" +#include "llvm/IR/Instruction.def" namespace { @@ -43,7 +43,7 @@ namespace { #define HANDLE_INST(N, OPCODE, CLASS) \ void visit##OPCODE(CLASS &) { ++Num##OPCODE##Inst; ++TotalInsts; } -#include "llvm/Instruction.def" +#include "llvm/IR/Instruction.def" void visitInstruction(Instruction &I) { errs() << "Instruction Count does not know about " << I; diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp index a76e5ad..4a3c74e 100644 --- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp @@ -18,20 +18,20 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "instsimplify" -#include "llvm/GlobalAlias.h" -#include "llvm/Operator.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SetVector.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" #include "llvm/Support/ValueHandle.h" -#include "llvm/DataLayout.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -657,51 +657,26 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, RecursionLimit); } -/// \brief Accumulate the constant integer offset a GEP represents. -/// -/// Given a getelementptr instruction/constantexpr, accumulate the constant -/// offset from the base pointer into the provided APInt 'Offset'. Returns true -/// if the GEP has all-constant indices. Returns false if any non-constant -/// index is encountered leaving the 'Offset' in an undefined state. The -/// 'Offset' APInt must be the bitwidth of the target's pointer size. -static bool accumulateGEPOffset(const DataLayout &TD, GEPOperator *GEP, - APInt &Offset) { - unsigned IntPtrWidth = TD.getPointerSizeInBits(); - assert(IntPtrWidth == Offset.getBitWidth()); - - gep_type_iterator GTI = gep_type_begin(GEP); - for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end(); I != E; - ++I, ++GTI) { - ConstantInt *OpC = dyn_cast<ConstantInt>(*I); - if (!OpC) return false; - if (OpC->isZero()) continue; - - // Handle a struct index, which adds its field offset to the pointer. - if (StructType *STy = dyn_cast<StructType>(*GTI)) { - unsigned ElementIdx = OpC->getZExtValue(); - const StructLayout *SL = TD.getStructLayout(STy); - Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); - continue; - } - - APInt TypeSize(IntPtrWidth, TD.getTypeAllocSize(GTI.getIndexedType())); - Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; - } - return true; -} - /// \brief Compute the base pointer and cumulative constant offsets for V. /// /// This strips all constant offsets off of V, leaving it the base pointer, and /// accumulates the total constant offset applied in the returned constant. It /// returns 0 if V is not a pointer, and returns the constant '0' if there are /// no constant offsets applied. -static Constant *stripAndComputeConstantOffsets(const DataLayout &TD, +/// +/// This is very similar to GetPointerBaseWithConstantOffset except it doesn't +/// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc. +/// folding. +static Constant *stripAndComputeConstantOffsets(const DataLayout *TD, Value *&V) { - if (!V->getType()->isPointerTy()) - return 0; + assert(V->getType()->getScalarType()->isPointerTy()); + + // Without DataLayout, just be conservative for now. Theoretically, more could + // be done in this case. + if (!TD) + return ConstantInt::get(IntegerType::get(V->getContext(), 64), 0); - unsigned IntPtrWidth = TD.getPointerSizeInBits(); + unsigned IntPtrWidth = TD->getPointerSizeInBits(); APInt Offset = APInt::getNullValue(IntPtrWidth); // Even though we don't look through PHI nodes, we could be called on an @@ -710,7 +685,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &TD, Visited.insert(V); do { if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { - if (!GEP->isInBounds() || !accumulateGEPOffset(TD, GEP, Offset)) + if (!GEP->isInBounds() || !GEP->accumulateConstantOffset(*TD, Offset)) break; V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { @@ -722,23 +697,24 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &TD, } else { break; } - assert(V->getType()->isPointerTy() && "Unexpected operand type!"); + assert(V->getType()->getScalarType()->isPointerTy() && + "Unexpected operand type!"); } while (Visited.insert(V)); - Type *IntPtrTy = TD.getIntPtrType(V->getContext()); - return ConstantInt::get(IntPtrTy, Offset); + Type *IntPtrTy = TD->getIntPtrType(V->getContext()); + Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset); + if (V->getType()->isVectorTy()) + return ConstantVector::getSplat(V->getType()->getVectorNumElements(), + OffsetIntPtr); + return OffsetIntPtr; } /// \brief Compute the constant difference between two pointer values. /// If the difference is not a constant, returns zero. -static Constant *computePointerDifference(const DataLayout &TD, +static Constant *computePointerDifference(const DataLayout *TD, Value *LHS, Value *RHS) { Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); - if (!LHSOffset) - return 0; Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS); - if (!RHSOffset) - return 0; // If LHS and RHS are not related via constant offsets to the same base // value, there is nothing we can do here. @@ -852,9 +828,9 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, return W; // Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...). - if (Q.TD && match(Op0, m_PtrToInt(m_Value(X))) && + if (match(Op0, m_PtrToInt(m_Value(X))) && match(Op1, m_PtrToInt(m_Value(Y)))) - if (Constant *Result = computePointerDifference(*Q.TD, X, Y)) + if (Constant *Result = computePointerDifference(Q.TD, X, Y)) return ConstantExpr::getIntegerCast(Result, Op0->getType(), true); // Mul distributes over Sub. Try some generic simplifications based on this. @@ -886,6 +862,112 @@ Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, RecursionLimit); } +/// Given operands for an FAdd, see if we can fold the result. If not, this +/// returns null. +static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const Query &Q, unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) { + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::FAdd, CLHS->getType(), + Ops, Q.TD, Q.TLI); + } + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + // fadd X, -0 ==> X + if (match(Op1, m_NegZero())) + return Op0; + + // fadd X, 0 ==> X, when we know X is not -0 + if (match(Op1, m_Zero()) && + (FMF.noSignedZeros() || CannotBeNegativeZero(Op0))) + return Op0; + + // fadd [nnan ninf] X, (fsub [nnan ninf] 0, X) ==> 0 + // where nnan and ninf have to occur at least once somewhere in this + // expression + Value *SubOp = 0; + if (match(Op1, m_FSub(m_AnyZero(), m_Specific(Op0)))) + SubOp = Op1; + else if (match(Op0, m_FSub(m_AnyZero(), m_Specific(Op1)))) + SubOp = Op0; + if (SubOp) { + Instruction *FSub = cast<Instruction>(SubOp); + if ((FMF.noNaNs() || FSub->hasNoNaNs()) && + (FMF.noInfs() || FSub->hasNoInfs())) + return Constant::getNullValue(Op0->getType()); + } + + return 0; +} + +/// Given operands for an FSub, see if we can fold the result. If not, this +/// returns null. +static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const Query &Q, unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) { + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::FSub, CLHS->getType(), + Ops, Q.TD, Q.TLI); + } + } + + // fsub X, 0 ==> X + if (match(Op1, m_Zero())) + return Op0; + + // fsub X, -0 ==> X, when we know X is not -0 + if (match(Op1, m_NegZero()) && + (FMF.noSignedZeros() || CannotBeNegativeZero(Op0))) + return Op0; + + // fsub 0, (fsub -0.0, X) ==> X + Value *X; + if (match(Op0, m_AnyZero())) { + if (match(Op1, m_FSub(m_NegZero(), m_Value(X)))) + return X; + if (FMF.noSignedZeros() && match(Op1, m_FSub(m_AnyZero(), m_Value(X)))) + return X; + } + + // fsub nnan ninf x, x ==> 0.0 + if (FMF.noNaNs() && FMF.noInfs() && Op0 == Op1) + return Constant::getNullValue(Op0->getType()); + + return 0; +} + +/// Given the operands for an FMul, see if we can fold the result +static Value *SimplifyFMulInst(Value *Op0, Value *Op1, + FastMathFlags FMF, + const Query &Q, + unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) { + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::FMul, CLHS->getType(), + Ops, Q.TD, Q.TLI); + } + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + // fmul X, 1.0 ==> X + if (match(Op1, m_FPOne())) + return Op0; + + // fmul nnan nsz X, 0 ==> 0 + if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero())) + return Op1; + + return 0; +} + /// SimplifyMulInst - Given operands for a Mul, see if we can /// fold the result. If not, this returns null. static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, @@ -951,6 +1033,26 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, return 0; } +Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const DataLayout *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyFAddInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit); +} + +Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const DataLayout *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyFSubInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit); +} + +Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, + FastMathFlags FMF, + const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyFMulInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit); +} + Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { @@ -1364,9 +1466,9 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, // A & (-A) = A if A is a power of two or zero. if (match(Op0, m_Neg(m_Specific(Op1))) || match(Op1, m_Neg(m_Specific(Op0)))) { - if (isPowerOfTwo(Op0, Q.TD, /*OrZero*/true)) + if (isKnownToBeAPowerOfTwo(Op0, /*OrZero*/true)) return Op0; - if (isPowerOfTwo(Op1, Q.TD, /*OrZero*/true)) + if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/true)) return Op1; } @@ -1591,9 +1693,48 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, return 0; } -static Constant *computePointerICmp(const DataLayout &TD, +// A significant optimization not implemented here is assuming that alloca +// addresses are not equal to incoming argument values. They don't *alias*, +// as we say, but that doesn't mean they aren't equal, so we take a +// conservative approach. +// +// This is inspired in part by C++11 5.10p1: +// "Two pointers of the same type compare equal if and only if they are both +// null, both point to the same function, or both represent the same +// address." +// +// This is pretty permissive. +// +// It's also partly due to C11 6.5.9p6: +// "Two pointers compare equal if and only if both are null pointers, both are +// pointers to the same object (including a pointer to an object and a +// subobject at its beginning) or function, both are pointers to one past the +// last element of the same array object, or one is a pointer to one past the +// end of one array object and the other is a pointer to the start of a +// different array object that happens to immediately follow the first array +// object in the address space.) +// +// C11's version is more restrictive, however there's no reason why an argument +// couldn't be a one-past-the-end value for a stack object in the caller and be +// equal to the beginning of a stack object in the callee. +// +// If the C and C++ standards are ever made sufficiently restrictive in this +// area, it may be possible to update LLVM's semantics accordingly and reinstate +// this optimization. +static Constant *computePointerICmp(const DataLayout *TD, + const TargetLibraryInfo *TLI, CmpInst::Predicate Pred, Value *LHS, Value *RHS) { + // First, skip past any trivial no-ops. + LHS = LHS->stripPointerCasts(); + RHS = RHS->stripPointerCasts(); + + // A non-null pointer is not equal to a null pointer. + if (llvm::isKnownNonNull(LHS) && isa<ConstantPointerNull>(RHS) && + (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE)) + return ConstantInt::get(GetCompareTy(LHS), + !CmpInst::isTrueWhenEqual(Pred)); + // We can only fold certain predicates on pointer comparisons. switch (Pred) { default: @@ -1616,19 +1757,83 @@ static Constant *computePointerICmp(const DataLayout &TD, break; } + // Strip off any constant offsets so that we can reason about them. + // It's tempting to use getUnderlyingObject or even just stripInBoundsOffsets + // here and compare base addresses like AliasAnalysis does, however there are + // numerous hazards. AliasAnalysis and its utilities rely on special rules + // governing loads and stores which don't apply to icmps. Also, AliasAnalysis + // doesn't need to guarantee pointer inequality when it says NoAlias. Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); - if (!LHSOffset) - return 0; Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS); - if (!RHSOffset) - return 0; - // If LHS and RHS are not related via constant offsets to the same base - // value, there is nothing we can do here. - if (LHS != RHS) - return 0; + // If LHS and RHS are related via constant offsets to the same base + // value, we can replace it with an icmp which just compares the offsets. + if (LHS == RHS) + return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset); + + // Various optimizations for (in)equality comparisons. + if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { + // Different non-empty allocations that exist at the same time have + // different addresses (if the program can tell). Global variables always + // exist, so they always exist during the lifetime of each other and all + // allocas. Two different allocas usually have different addresses... + // + // However, if there's an @llvm.stackrestore dynamically in between two + // allocas, they may have the same address. It's tempting to reduce the + // scope of the problem by only looking at *static* allocas here. That would + // cover the majority of allocas while significantly reducing the likelihood + // of having an @llvm.stackrestore pop up in the middle. However, it's not + // actually impossible for an @llvm.stackrestore to pop up in the middle of + // an entry block. Also, if we have a block that's not attached to a + // function, we can't tell if it's "static" under the current definition. + // Theoretically, this problem could be fixed by creating a new kind of + // instruction kind specifically for static allocas. Such a new instruction + // could be required to be at the top of the entry block, thus preventing it + // from being subject to a @llvm.stackrestore. Instcombine could even + // convert regular allocas into these special allocas. It'd be nifty. + // However, until then, this problem remains open. + // + // So, we'll assume that two non-empty allocas have different addresses + // for now. + // + // With all that, if the offsets are within the bounds of their allocations + // (and not one-past-the-end! so we can't use inbounds!), and their + // allocations aren't the same, the pointers are not equal. + // + // Note that it's not necessary to check for LHS being a global variable + // address, due to canonicalization and constant folding. + if (isa<AllocaInst>(LHS) && + (isa<AllocaInst>(RHS) || isa<GlobalVariable>(RHS))) { + ConstantInt *LHSOffsetCI = dyn_cast<ConstantInt>(LHSOffset); + ConstantInt *RHSOffsetCI = dyn_cast<ConstantInt>(RHSOffset); + uint64_t LHSSize, RHSSize; + if (LHSOffsetCI && RHSOffsetCI && + getObjectSize(LHS, LHSSize, TD, TLI) && + getObjectSize(RHS, RHSSize, TD, TLI)) { + const APInt &LHSOffsetValue = LHSOffsetCI->getValue(); + const APInt &RHSOffsetValue = RHSOffsetCI->getValue(); + if (!LHSOffsetValue.isNegative() && + !RHSOffsetValue.isNegative() && + LHSOffsetValue.ult(LHSSize) && + RHSOffsetValue.ult(RHSSize)) { + return ConstantInt::get(GetCompareTy(LHS), + !CmpInst::isTrueWhenEqual(Pred)); + } + } - return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset); + // Repeat the above check but this time without depending on DataLayout + // or being able to compute a precise size. + if (!cast<PointerType>(LHS->getType())->isEmptyTy() && + !cast<PointerType>(RHS->getType())->isEmptyTy() && + LHSOffset->isNullValue() && + RHSOffset->isNullValue()) + return ConstantInt::get(GetCompareTy(LHS), + !CmpInst::isTrueWhenEqual(Pred)); + } + } + + // Otherwise, fail. + return 0; } /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can @@ -1693,62 +1898,6 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } - // icmp <object*>, <object*/null> - Different identified objects have - // different addresses (unless null), and what's more the address of an - // identified local is never equal to another argument (again, barring null). - // Note that generalizing to the case where LHS is a global variable address - // or null is pointless, since if both LHS and RHS are constants then we - // already constant folded the compare, and if only one of them is then we - // moved it to RHS already. - Value *LHSPtr = LHS->stripPointerCasts(); - Value *RHSPtr = RHS->stripPointerCasts(); - if (LHSPtr == RHSPtr) - return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred)); - - // Be more aggressive about stripping pointer adjustments when checking a - // comparison of an alloca address to another object. We can rip off all - // inbounds GEP operations, even if they are variable. - LHSPtr = LHSPtr->stripInBoundsOffsets(); - if (llvm::isIdentifiedObject(LHSPtr)) { - RHSPtr = RHSPtr->stripInBoundsOffsets(); - if (llvm::isKnownNonNull(LHSPtr) || llvm::isKnownNonNull(RHSPtr)) { - // If both sides are different identified objects, they aren't equal - // unless they're null. - if (LHSPtr != RHSPtr && llvm::isIdentifiedObject(RHSPtr) && - Pred == CmpInst::ICMP_EQ) - return ConstantInt::get(ITy, false); - - // A local identified object (alloca or noalias call) can't equal any - // incoming argument, unless they're both null or they belong to - // different functions. The latter happens during inlining. - if (Instruction *LHSInst = dyn_cast<Instruction>(LHSPtr)) - if (Argument *RHSArg = dyn_cast<Argument>(RHSPtr)) - if (LHSInst->getParent()->getParent() == RHSArg->getParent() && - Pred == CmpInst::ICMP_EQ) - return ConstantInt::get(ITy, false); - } - - // Assume that the constant null is on the right. - if (llvm::isKnownNonNull(LHSPtr) && isa<ConstantPointerNull>(RHSPtr)) { - if (Pred == CmpInst::ICMP_EQ) - return ConstantInt::get(ITy, false); - else if (Pred == CmpInst::ICMP_NE) - return ConstantInt::get(ITy, true); - } - } else if (Argument *LHSArg = dyn_cast<Argument>(LHSPtr)) { - RHSPtr = RHSPtr->stripInBoundsOffsets(); - // An alloca can't be equal to an argument unless they come from separate - // functions via inlining. - if (AllocaInst *RHSInst = dyn_cast<AllocaInst>(RHSPtr)) { - if (LHSArg->getParent() == RHSInst->getParent()->getParent()) { - if (Pred == CmpInst::ICMP_EQ) - return ConstantInt::get(ITy, false); - else if (Pred == CmpInst::ICMP_NE) - return ConstantInt::get(ITy, true); - } - } - } - // If we are comparing with zero then try hard since this is a common case. if (match(RHS, m_Zero())) { bool LHSKnownNonNegative, LHSKnownNegative; @@ -2375,8 +2524,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Simplify comparisons of related pointers using a powerful, recursive // GEP-walk when we have target data available.. - if (Q.TD && LHS->getType()->isPointerTy() && RHS->getType()->isPointerTy()) - if (Constant *C = computePointerICmp(*Q.TD, Pred, LHS, RHS)) + if (LHS->getType()->isPointerTy()) + if (Constant *C = computePointerICmp(Q.TD, Q.TLI, Pred, LHS, RHS)) return C; if (GetElementPtrInst *GLHS = dyn_cast<GetElementPtrInst>(LHS)) { @@ -2697,10 +2846,18 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, case Instruction::Add: return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, Q, MaxRecurse); + case Instruction::FAdd: + return SimplifyFAddInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); + case Instruction::Sub: return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, Q, MaxRecurse); + case Instruction::FSub: + return SimplifyFSubInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); + case Instruction::Mul: return SimplifyMulInst (LHS, RHS, Q, MaxRecurse); + case Instruction::FMul: + return SimplifyFMulInst (LHS, RHS, FastMathFlags(), Q, MaxRecurse); case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, Q, MaxRecurse); case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse); case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, Q, MaxRecurse); @@ -2768,14 +2925,88 @@ Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, RecursionLimit); } -static Value *SimplifyCallInst(CallInst *CI, const Query &) { - // call undef -> undef - if (isa<UndefValue>(CI->getCalledValue())) - return UndefValue::get(CI->getType()); +static bool IsIdempotent(Intrinsic::ID ID) { + switch (ID) { + default: return false; + + // Unary idempotent: f(f(x)) = f(x) + case Intrinsic::fabs: + case Intrinsic::floor: + case Intrinsic::ceil: + case Intrinsic::trunc: + case Intrinsic::rint: + case Intrinsic::nearbyint: + return true; + } +} + +template <typename IterTy> +static Value *SimplifyIntrinsic(Intrinsic::ID IID, IterTy ArgBegin, IterTy ArgEnd, + const Query &Q, unsigned MaxRecurse) { + // Perform idempotent optimizations + if (!IsIdempotent(IID)) + return 0; + + // Unary Ops + if (std::distance(ArgBegin, ArgEnd) == 1) + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*ArgBegin)) + if (II->getIntrinsicID() == IID) + return II; return 0; } +template <typename IterTy> +static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, + const Query &Q, unsigned MaxRecurse) { + Type *Ty = V->getType(); + if (PointerType *PTy = dyn_cast<PointerType>(Ty)) + Ty = PTy->getElementType(); + FunctionType *FTy = cast<FunctionType>(Ty); + + // call undef -> undef + if (isa<UndefValue>(V)) + return UndefValue::get(FTy->getReturnType()); + + Function *F = dyn_cast<Function>(V); + if (!F) + return 0; + + if (unsigned IID = F->getIntrinsicID()) + if (Value *Ret = + SimplifyIntrinsic((Intrinsic::ID) IID, ArgBegin, ArgEnd, Q, MaxRecurse)) + return Ret; + + if (!canConstantFoldCallTo(F)) + return 0; + + SmallVector<Constant *, 4> ConstantArgs; + ConstantArgs.reserve(ArgEnd - ArgBegin); + for (IterTy I = ArgBegin, E = ArgEnd; I != E; ++I) { + Constant *C = dyn_cast<Constant>(*I); + if (!C) + return 0; + ConstantArgs.push_back(C); + } + + return ConstantFoldCall(F, ConstantArgs, Q.TLI); +} + +Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin, + User::op_iterator ArgEnd, const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(TD, TLI, DT), + RecursionLimit); +} + +Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args, + const DataLayout *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyCall(V, Args.begin(), Args.end(), Query(TD, TLI, DT), + RecursionLimit); +} + /// SimplifyInstruction - See if we can compute a simplified version of this /// instruction. If not, this returns null. Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *TD, @@ -2787,18 +3018,30 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *TD, default: Result = ConstantFoldInstruction(I, TD, TLI); break; + case Instruction::FAdd: + Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1), + I->getFastMathFlags(), TD, TLI, DT); + break; case Instruction::Add: Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1), cast<BinaryOperator>(I)->hasNoSignedWrap(), cast<BinaryOperator>(I)->hasNoUnsignedWrap(), TD, TLI, DT); break; + case Instruction::FSub: + Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1), + I->getFastMathFlags(), TD, TLI, DT); + break; case Instruction::Sub: Result = SimplifySubInst(I->getOperand(0), I->getOperand(1), cast<BinaryOperator>(I)->hasNoSignedWrap(), cast<BinaryOperator>(I)->hasNoUnsignedWrap(), TD, TLI, DT); break; + case Instruction::FMul: + Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1), + I->getFastMathFlags(), TD, TLI, DT); + break; case Instruction::Mul: Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; @@ -2872,9 +3115,12 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *TD, case Instruction::PHI: Result = SimplifyPHINode(cast<PHINode>(I), Query (TD, TLI, DT)); break; - case Instruction::Call: - Result = SimplifyCallInst(cast<CallInst>(I), Query (TD, TLI, DT)); + case Instruction::Call: { + CallSite CS(cast<CallInst>(I)); + Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), + TD, TLI, DT); break; + } case Instruction::Trunc: Result = SimplifyTruncInst(I->getOperand(0), I->getType(), TD, TLI, DT); break; diff --git a/contrib/llvm/lib/Analysis/Interval.cpp b/contrib/llvm/lib/Analysis/Interval.cpp index ca9cdca..26a0322 100644 --- a/contrib/llvm/lib/Analysis/Interval.cpp +++ b/contrib/llvm/lib/Analysis/Interval.cpp @@ -13,7 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/Interval.h" -#include "llvm/BasicBlock.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/Support/CFG.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp index 2b87d80..66b5e85 100644 --- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp @@ -13,23 +13,22 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "lazy-value-info" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LazyValueInfo.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/CFG.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" #include "llvm/Support/PatternMatch.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/ValueHandle.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLibraryInfo.h" #include <map> #include <stack> using namespace llvm; diff --git a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp index efb722b..fefa516 100644 --- a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp @@ -12,9 +12,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LibCallAliasAnalysis.h" -#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/LibCallSemantics.h" -#include "llvm/Function.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/IR/Function.h" #include "llvm/Pass.h" using namespace llvm; diff --git a/contrib/llvm/lib/Analysis/LibCallSemantics.cpp b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp index 81b0f46..0592ccb 100644 --- a/contrib/llvm/lib/Analysis/LibCallSemantics.cpp +++ b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp @@ -15,7 +15,7 @@ #include "llvm/Analysis/LibCallSemantics.h" #include "llvm/ADT/StringMap.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" using namespace llvm; /// getMap - This impl pointer in ~LibCallInfo is actually a StringMap. This diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp index 6d6d580..9393508 100644 --- a/contrib/llvm/lib/Analysis/Lint.cpp +++ b/contrib/llvm/lib/Analysis/Lint.cpp @@ -34,26 +34,26 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Lint.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/Lint.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/InstVisitor.h" #include "llvm/Pass.h" #include "llvm/PassManager.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Function.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/InstVisitor.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; namespace { @@ -412,51 +412,49 @@ void Lint::visitMemoryReference(Instruction &I, } // Check for buffer overflows and misalignment. - if (TD) { - // Only handles memory references that read/write something simple like an - // alloca instruction or a global variable. - int64_t Offset = 0; - if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *TD)) { - // OK, so the access is to a constant offset from Ptr. Check that Ptr is - // something we can handle and if so extract the size of this base object - // along with its alignment. - uint64_t BaseSize = AliasAnalysis::UnknownSize; - unsigned BaseAlign = 0; - - if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { - Type *ATy = AI->getAllocatedType(); - if (!AI->isArrayAllocation() && ATy->isSized()) - BaseSize = TD->getTypeAllocSize(ATy); - BaseAlign = AI->getAlignment(); - if (BaseAlign == 0 && ATy->isSized()) - BaseAlign = TD->getABITypeAlignment(ATy); - } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { - // If the global may be defined differently in another compilation unit - // then don't warn about funky memory accesses. - if (GV->hasDefinitiveInitializer()) { - Type *GTy = GV->getType()->getElementType(); - if (GTy->isSized()) - BaseSize = TD->getTypeAllocSize(GTy); - BaseAlign = GV->getAlignment(); - if (BaseAlign == 0 && GTy->isSized()) - BaseAlign = TD->getABITypeAlignment(GTy); - } + // Only handles memory references that read/write something simple like an + // alloca instruction or a global variable. + int64_t Offset = 0; + if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, TD)) { + // OK, so the access is to a constant offset from Ptr. Check that Ptr is + // something we can handle and if so extract the size of this base object + // along with its alignment. + uint64_t BaseSize = AliasAnalysis::UnknownSize; + unsigned BaseAlign = 0; + + if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { + Type *ATy = AI->getAllocatedType(); + if (TD && !AI->isArrayAllocation() && ATy->isSized()) + BaseSize = TD->getTypeAllocSize(ATy); + BaseAlign = AI->getAlignment(); + if (TD && BaseAlign == 0 && ATy->isSized()) + BaseAlign = TD->getABITypeAlignment(ATy); + } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { + // If the global may be defined differently in another compilation unit + // then don't warn about funky memory accesses. + if (GV->hasDefinitiveInitializer()) { + Type *GTy = GV->getType()->getElementType(); + if (TD && GTy->isSized()) + BaseSize = TD->getTypeAllocSize(GTy); + BaseAlign = GV->getAlignment(); + if (TD && BaseAlign == 0 && GTy->isSized()) + BaseAlign = TD->getABITypeAlignment(GTy); } - - // Accesses from before the start or after the end of the object are not - // defined. - Assert1(Size == AliasAnalysis::UnknownSize || - BaseSize == AliasAnalysis::UnknownSize || - (Offset >= 0 && Offset + Size <= BaseSize), - "Undefined behavior: Buffer overflow", &I); - - // Accesses that say that the memory is more aligned than it is are not - // defined. - if (Align == 0 && Ty && Ty->isSized()) - Align = TD->getABITypeAlignment(Ty); - Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), - "Undefined behavior: Memory reference address is misaligned", &I); } + + // Accesses from before the start or after the end of the object are not + // defined. + Assert1(Size == AliasAnalysis::UnknownSize || + BaseSize == AliasAnalysis::UnknownSize || + (Offset >= 0 && Offset + Size <= BaseSize), + "Undefined behavior: Buffer overflow", &I); + + // Accesses that say that the memory is more aligned than it is are not + // defined. + if (TD && Align == 0 && Ty && Ty->isSized()) + Align = TD->getABITypeAlignment(Ty); + Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), + "Undefined behavior: Memory reference address is misaligned", &I); } } diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp index 73aa8b4..0902a39 100644 --- a/contrib/llvm/lib/Analysis/Loads.cpp +++ b/contrib/llvm/lib/Analysis/Loads.cpp @@ -13,12 +13,13 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/DataLayout.h" -#include "llvm/GlobalAlias.h" -#include "llvm/GlobalVariable.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Operator.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Operator.h" using namespace llvm; /// AreEquivalentAddressValues - Test if A and B will obviously have the same @@ -48,48 +49,18 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) { return false; } -/// getUnderlyingObjectWithOffset - Strip off up to MaxLookup GEPs and -/// bitcasts to get back to the underlying object being addressed, keeping -/// track of the offset in bytes from the GEPs relative to the result. -/// This is closely related to GetUnderlyingObject but is located -/// here to avoid making VMCore depend on DataLayout. -static Value *getUnderlyingObjectWithOffset(Value *V, const DataLayout *TD, - uint64_t &ByteOffset, - unsigned MaxLookup = 6) { - if (!V->getType()->isPointerTy()) - return V; - for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { - if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { - if (!GEP->hasAllConstantIndices()) - return V; - SmallVector<Value*, 8> Indices(GEP->op_begin() + 1, GEP->op_end()); - ByteOffset += TD->getIndexedOffset(GEP->getPointerOperandType(), - Indices); - V = GEP->getPointerOperand(); - } else if (Operator::getOpcode(V) == Instruction::BitCast) { - V = cast<Operator>(V)->getOperand(0); - } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { - if (GA->mayBeOverridden()) - return V; - V = GA->getAliasee(); - } else { - return V; - } - assert(V->getType()->isPointerTy() && "Unexpected operand type!"); - } - return V; -} - /// isSafeToLoadUnconditionally - Return true if we know that executing a load /// from this value cannot trap. If it is not obviously safe to load from the /// specified pointer, we do a quick local scan of the basic block containing /// ScanFrom, to determine if the address is already accessed. bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, unsigned Align, const DataLayout *TD) { - uint64_t ByteOffset = 0; + int64_t ByteOffset = 0; Value *Base = V; - if (TD) - Base = getUnderlyingObjectWithOffset(V, TD, ByteOffset); + Base = GetPointerBaseWithConstantOffset(V, ByteOffset, TD); + + if (ByteOffset < 0) // out of bounds + return false; Type *BaseType = 0; unsigned BaseAlign = 0; @@ -97,10 +68,10 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, // An alloca is safe to load from as load as it is suitably aligned. BaseType = AI->getAllocatedType(); BaseAlign = AI->getAlignment(); - } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Base)) { + } else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { // Global variables are safe to load from but their size cannot be // guaranteed if they are overridden. - if (!isa<GlobalAlias>(GV) && !GV->mayBeOverridden()) { + if (!GV->mayBeOverridden()) { BaseType = GV->getType()->getElementType(); BaseAlign = GV->getAlignment(); } diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp index 8341f9d..f1ad650 100644 --- a/contrib/llvm/lib/Analysis/LoopInfo.cpp +++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp @@ -15,18 +15,19 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfoImpl.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallPtrSet.h" #include <algorithm> using namespace llvm; @@ -213,14 +214,75 @@ bool Loop::isLoopSimplifyForm() const { /// isSafeToClone - Return true if the loop body is safe to clone in practice. /// Routines that reform the loop CFG and split edges often fail on indirectbr. bool Loop::isSafeToClone() const { - // Return false if any loop blocks contain indirectbrs. + // Return false if any loop blocks contain indirectbrs, or there are any calls + // to noduplicate functions. for (Loop::block_iterator I = block_begin(), E = block_end(); I != E; ++I) { - if (isa<IndirectBrInst>((*I)->getTerminator())) + if (isa<IndirectBrInst>((*I)->getTerminator())) { + return false; + } else if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator())) { + if (II->hasFnAttr(Attribute::NoDuplicate)) + return false; + } + + for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) { + if (const CallInst *CI = dyn_cast<CallInst>(BI)) { + if (CI->hasFnAttr(Attribute::NoDuplicate)) + return false; + } + } + } + return true; +} + +bool Loop::isAnnotatedParallel() const { + + BasicBlock *latch = getLoopLatch(); + if (latch == NULL) + return false; + + MDNode *desiredLoopIdMetadata = + latch->getTerminator()->getMetadata("llvm.loop.parallel"); + + if (!desiredLoopIdMetadata) return false; + + // The loop branch contains the parallel loop metadata. In order to ensure + // that any parallel-loop-unaware optimization pass hasn't added loop-carried + // dependencies (thus converted the loop back to a sequential loop), check + // that all the memory instructions in the loop contain parallelism metadata + // that point to the same unique "loop id metadata" the loop branch does. + for (block_iterator BB = block_begin(), BE = block_end(); BB != BE; ++BB) { + for (BasicBlock::iterator II = (*BB)->begin(), EE = (*BB)->end(); + II != EE; II++) { + + if (!II->mayReadOrWriteMemory()) + continue; + + if (!II->getMetadata("llvm.mem.parallel_loop_access")) + return false; + + // The memory instruction can refer to the loop identifier metadata + // directly or indirectly through another list metadata (in case of + // nested parallel loops). The loop identifier metadata refers to + // itself so we can check both cases with the same routine. + MDNode *loopIdMD = + dyn_cast<MDNode>(II->getMetadata("llvm.mem.parallel_loop_access")); + bool loopIdMDFound = false; + for (unsigned i = 0, e = loopIdMD->getNumOperands(); i < e; ++i) { + if (loopIdMD->getOperand(i) == desiredLoopIdMetadata) { + loopIdMDFound = true; + break; + } + } + + if (!loopIdMDFound) + return false; + } } return true; } + /// hasDedicatedExits - Return true if no exit block for the loop /// has a predecessor that is outside the loop. bool Loop::hasDedicatedExits() const { diff --git a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp index 8578a63..d26aaf1 100644 --- a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp +++ b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp @@ -10,15 +10,15 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/LLVMContext.h" #include "llvm/Analysis/Passes.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Assembly/Writer.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CallSite.h" -#include "llvm/Support/InstIterator.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/InstIterator.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SetVector.h" using namespace llvm; namespace { diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp index 0a539fe..d490d54 100644 --- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -8,24 +8,24 @@ //===----------------------------------------------------------------------===// // // This family of functions identifies calls to builtin functions that allocate -// or free memory. +// or free memory. // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "memory-builtins" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/Metadata.h" -#include "llvm/Module.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -88,6 +88,10 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) { static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, const TargetLibraryInfo *TLI, bool LookThroughBitCast = false) { + // Skip intrinsics + if (isa<IntrinsicInst>(V)) + return 0; + Function *Callee = getCalledFunction(V, LookThroughBitCast); if (!Callee) return 0; @@ -132,7 +136,7 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) { ImmutableCallSite CS(LookThroughBitCast ? V->stripPointerCasts() : V); - return CS && CS.hasFnAttr(Attributes::NoAlias); + return CS && CS.hasFnAttr(Attribute::NoAlias); } @@ -194,12 +198,12 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *TD, const TargetLibraryInfo *TLI, bool LookThroughSExt = false) { if (!CI) - return NULL; + return 0; // The size of the malloc's result type must be known to determine array size. Type *T = getMallocAllocatedType(CI, TLI); if (!T || !T->isSized() || !TD) - return NULL; + return 0; unsigned ElementSize = TD->getTypeAllocSize(T); if (StructType *ST = dyn_cast<StructType>(T)) @@ -208,15 +212,15 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *TD, // If malloc call's arg can be determined to be a multiple of ElementSize, // return the multiple. Otherwise, return NULL. Value *MallocArg = CI->getArgOperand(0); - Value *Multiple = NULL; + Value *Multiple = 0; if (ComputeMultiple(MallocArg, ElementSize, Multiple, LookThroughSExt)) return Multiple; - return NULL; + return 0; } -/// isArrayMalloc - Returns the corresponding CallInst if the instruction +/// isArrayMalloc - Returns the corresponding CallInst if the instruction /// is a call to malloc whose array size can be determined and the array size /// is not constant 1. Otherwise, return NULL. const CallInst *llvm::isArrayMalloc(const Value *I, @@ -225,12 +229,12 @@ const CallInst *llvm::isArrayMalloc(const Value *I, const CallInst *CI = extractMallocCall(I, TLI); Value *ArraySize = computeArraySize(CI, TD, TLI); - if (ArraySize && - ArraySize != ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) - return CI; + if (ConstantInt *ConstSize = dyn_cast_or_null<ConstantInt>(ArraySize)) + if (ConstSize->isOne()) + return CI; // CI is a non-array malloc or we can't figure out that it is an array malloc. - return NULL; + return 0; } /// getMallocType - Returns the PointerType resulting from the malloc call. @@ -241,8 +245,8 @@ const CallInst *llvm::isArrayMalloc(const Value *I, PointerType *llvm::getMallocType(const CallInst *CI, const TargetLibraryInfo *TLI) { assert(isMallocLikeFn(CI, TLI) && "getMallocType and not malloc call"); - - PointerType *MallocType = NULL; + + PointerType *MallocType = 0; unsigned NumOfBitCastUses = 0; // Determine if CallInst has a bitcast use. @@ -262,7 +266,7 @@ PointerType *llvm::getMallocType(const CallInst *CI, return cast<PointerType>(CI->getType()); // Type could not be determined. - return NULL; + return 0; } /// getMallocAllocatedType - Returns the Type allocated by malloc call. @@ -273,10 +277,10 @@ PointerType *llvm::getMallocType(const CallInst *CI, Type *llvm::getMallocAllocatedType(const CallInst *CI, const TargetLibraryInfo *TLI) { PointerType *PT = getMallocType(CI, TLI); - return PT ? PT->getElementType() : NULL; + return PT ? PT->getElementType() : 0; } -/// getMallocArraySize - Returns the array size of a malloc call. If the +/// getMallocArraySize - Returns the array size of a malloc call. If the /// argument passed to malloc is a multiple of the size of the malloced type, /// then return that multiple. For non-array mallocs, the multiple is /// constant 1. Otherwise, return NULL for mallocs whose array size cannot be @@ -300,7 +304,7 @@ const CallInst *llvm::extractCallocCall(const Value *I, /// isFreeCall - Returns non-null if the value is a call to the builtin free() const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { const CallInst *CI = dyn_cast<CallInst>(I); - if (!CI) + if (!CI || isa<IntrinsicInst>(CI)) return 0; Function *Callee = CI->getCalledFunction(); if (Callee == 0 || !Callee->isDeclaration()) @@ -317,7 +321,7 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { return 0; // Check free prototype. - // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin + // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin // attribute will exist. FunctionType *FTy = Callee->getFunctionType(); if (!FTy->getReturnType()->isVoidTy()) @@ -360,6 +364,26 @@ bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *TD, return true; } +/// \brief Compute the size of the underlying object pointed by Ptr. Returns +/// true and the object size in Size if successful, and false otherwise. +/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas, +/// byval arguments, and global variables. +bool llvm::getUnderlyingObjectSize(const Value *Ptr, uint64_t &Size, + const DataLayout *TD, + const TargetLibraryInfo *TLI, + bool RoundToAlign) { + if (!TD) + return false; + + ObjectSizeOffsetVisitor Visitor(TD, TLI, Ptr->getContext(), RoundToAlign); + SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr)); + if (!Visitor.knownSize(Data)) + return false; + + Size = Data.first.getZExtValue(); + return true; +} + STATISTIC(ObjectVisitorArgument, "Number of arguments with unsolved size and offset"); @@ -385,20 +409,29 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *TD, SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { V = V->stripPointerCasts(); - if (Instruction *I = dyn_cast<Instruction>(V)) { - // If we have already seen this instruction, bail out. Cycles can happen in - // unreachable code after constant propagation. - if (!SeenInsts.insert(I)) - return unknown(); + if (isa<Instruction>(V) || isa<GEPOperator>(V)) { + // Return cached value or insert unknown in cache if size of V was not + // computed yet in order to avoid recursions in PHis. + std::pair<CacheMapTy::iterator, bool> CacheVal = + CacheMap.insert(std::make_pair(V, unknown())); + if (!CacheVal.second) + return CacheVal.first->second; + + SizeOffsetType Result; if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) - return visitGEPOperator(*GEP); - return visit(*I); + Result = visitGEPOperator(*GEP); + else + Result = visit(cast<Instruction>(*V)); + return CacheMap[V] = Result; } + if (Argument *A = dyn_cast<Argument>(V)) return visitArgument(*A); if (ConstantPointerNull *P = dyn_cast<ConstantPointerNull>(V)) return visitConstantPointerNull(*P); + if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) + return visitGlobalAlias(*GA); if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) return visitGlobalVariable(*GV); if (UndefValue *UV = dyn_cast<UndefValue>(V)) @@ -406,8 +439,6 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { if (CE->getOpcode() == Instruction::IntToPtr) return unknown(); // clueless - if (CE->getOpcode() == Instruction::GetElementPtr) - return visitGEPOperator(cast<GEPOperator>(*CE)); } DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V @@ -510,14 +541,19 @@ ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) { SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) { SizeOffsetType PtrData = compute(GEP.getPointerOperand()); - if (!bothKnown(PtrData) || !GEP.hasAllConstantIndices()) + APInt Offset(IntTyBits, 0); + if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(*TD, Offset)) return unknown(); - SmallVector<Value*, 8> Ops(GEP.idx_begin(), GEP.idx_end()); - APInt Offset(IntTyBits,TD->getIndexedOffset(GEP.getPointerOperandType(),Ops)); return std::make_pair(PtrData.first, PtrData.second + Offset); } +SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalAlias(GlobalAlias &GA) { + if (GA.mayBeOverridden()) + return unknown(); + return compute(GA.getAliasee()); +} + SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){ if (!GV.hasDefinitiveInitializer()) return unknown(); @@ -536,9 +572,21 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst&) { return unknown(); } -SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode&) { - // too complex to analyze statically. - return unknown(); +SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode &PHI) { + if (PHI.getNumIncomingValues() == 0) + return unknown(); + + SizeOffsetType Ret = compute(PHI.getIncomingValue(0)); + if (!bothKnown(Ret)) + return unknown(); + + // Verify that all PHI incoming pointers have the same size and offset. + for (unsigned i = 1, e = PHI.getNumIncomingValues(); i != e; ++i) { + SizeOffsetType EdgeData = compute(PHI.getIncomingValue(i)); + if (!bothKnown(EdgeData) || EdgeData != Ret) + return unknown(); + } + return Ret; } SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) { @@ -619,6 +667,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) { } else if (isa<Argument>(V) || (isa<ConstantExpr>(V) && cast<ConstantExpr>(V)->getOpcode() == Instruction::IntToPtr) || + isa<GlobalAlias>(V) || isa<GlobalVariable>(V)) { // ignore values where we cannot do more than what ObjectSizeVisitor can Result = unknown(); diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 9872890..2240e9d 100644 --- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This file implements an analysis that determines, for a given memory -// operation, what preceding memory operations it depends on. It builds on +// operation, what preceding memory operations it depends on. It builds on // alias analysis information, and tries to provide a lazy, caching interface to // a common kind of alias information query. // @@ -16,21 +16,21 @@ #define DEBUG_TYPE "memdep" #include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Function.h" -#include "llvm/LLVMContext.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/PredIteratorCache.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" -#include "llvm/DataLayout.h" +#include "llvm/Support/PredIteratorCache.h" using namespace llvm; STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses"); @@ -52,7 +52,7 @@ STATISTIC(NumCacheCompleteNonLocalPtr, static const int BlockScanLimit = 500; char MemoryDependenceAnalysis::ID = 0; - + // Register this pass... INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep", "Memory Dependence Analysis", false, true) @@ -99,7 +99,7 @@ bool MemoryDependenceAnalysis::runOnFunction(Function &) { /// RemoveFromReverseMap - This is a helper function that removes Val from /// 'Inst's set in ReverseMap. If the set becomes empty, remove Inst's entry. template <typename KeyTy> -static void RemoveFromReverseMap(DenseMap<Instruction*, +static void RemoveFromReverseMap(DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> > &ReverseMap, Instruction *Inst, KeyTy Val) { typename DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> >::iterator @@ -123,7 +123,8 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, if (LI->isUnordered()) { Loc = AA->getLocation(LI); return AliasAnalysis::Ref; - } else if (LI->getOrdering() == Monotonic) { + } + if (LI->getOrdering() == Monotonic) { Loc = AA->getLocation(LI); return AliasAnalysis::ModRef; } @@ -135,7 +136,8 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, if (SI->isUnordered()) { Loc = AA->getLocation(SI); return AliasAnalysis::Mod; - } else if (SI->getOrdering() == Monotonic) { + } + if (SI->getOrdering() == Monotonic) { Loc = AA->getLocation(SI); return AliasAnalysis::ModRef; } @@ -196,13 +198,13 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, // Walk backwards through the block, looking for dependencies while (ScanIt != BB->begin()) { // Limit the amount of scanning we do so we don't end up with quadratic - // running time on extreme testcases. + // running time on extreme testcases. --Limit; if (!Limit) return MemDepResult::getUnknown(); Instruction *Inst = --ScanIt; - + // If this inst is a memory op, get the pointer it accessed AliasAnalysis::Location Loc; AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA); @@ -251,7 +253,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, /// /// MemLocBase, MemLocOffset are lazily computed here the first time the /// base/offs of memloc is needed. -static bool +static bool isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, const Value *&MemLocBase, int64_t &MemLocOffs, @@ -262,7 +264,7 @@ isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, // If we haven't already computed the base/offset of MemLoc, do so now. if (MemLocBase == 0) - MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, *TD); + MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, TD); unsigned Size = MemoryDependenceAnalysis:: getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size, @@ -283,25 +285,31 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, const DataLayout &TD) { // We can only extend simple integer loads. if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) return 0; - + + // Load widening is hostile to ThreadSanitizer: it may cause false positives + // or make the reports more cryptic (access sizes are wrong). + if (LI->getParent()->getParent()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeThread)) + return 0; + // Get the base of this load. int64_t LIOffs = 0; - const Value *LIBase = - GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, TD); - + const Value *LIBase = + GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, &TD); + // If the two pointers are not based on the same pointer, we can't tell that // they are related. if (LIBase != MemLocBase) return 0; - + // Okay, the two values are based on the same pointer, but returned as // no-alias. This happens when we have things like two byte loads at "P+1" // and "P+3". Check to see if increasing the size of the "LI" load up to its // alignment (or the largest native integer type) will allow us to load all // the bits required by MemLoc. - + // If MemLoc is before LI, then no widening of LI will help us out. if (MemLocOffs < LIOffs) return 0; - + // Get the alignment of the load in bytes. We assume that it is safe to load // any legal integer up to this size without a problem. For example, if we're // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can @@ -310,15 +318,15 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, unsigned LoadAlign = LI->getAlignment(); int64_t MemLocEnd = MemLocOffs+MemLocSize; - + // If no amount of rounding up will let MemLoc fit into LI, then bail out. if (LIOffs+LoadAlign < MemLocEnd) return 0; - + // This is the size of the load to try. Start with the next larger power of // two. unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U; NewLoadByteSize = NextPowerOf2(NewLoadByteSize); - + while (1) { // If this load size is bigger than our known alignment or would not fit // into a native integer register, then we fail. @@ -327,8 +335,8 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, return 0; if (LIOffs+NewLoadByteSize > MemLocEnd && - LI->getParent()->getParent()->getFnAttributes(). - hasAttribute(Attributes::AddressSafety)) + LI->getParent()->getParent()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeAddress)) // We will be reading past the location accessed by the original program. // While this is safe in a regular build, Address Safety analysis tools // may start reporting false warnings. So, don't do widening. @@ -337,7 +345,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, // If a load of this width would include all of MemLoc, then we succeed. if (LIOffs+NewLoadByteSize >= MemLocEnd) return NewLoadByteSize; - + NewLoadByteSize <<= 1; } } @@ -345,15 +353,23 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, /// getPointerDependencyFrom - Return the instruction on which a memory /// location depends. If isLoad is true, this routine ignores may-aliases with /// read-only operations. If isLoad is false, this routine ignores may-aliases -/// with reads from read-only locations. +/// with reads from read-only locations. If possible, pass the query +/// instruction as well; this function may take advantage of the metadata +/// annotated to the query instruction to refine the result. MemDepResult MemoryDependenceAnalysis:: -getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, - BasicBlock::iterator ScanIt, BasicBlock *BB) { +getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, + BasicBlock::iterator ScanIt, BasicBlock *BB, + Instruction *QueryInst) { const Value *MemLocBase = 0; int64_t MemLocOffset = 0; - unsigned Limit = BlockScanLimit; + bool isInvariantLoad = false; + if (isLoad && QueryInst) { + LoadInst *LI = dyn_cast<LoadInst>(QueryInst); + if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != 0) + isInvariantLoad = true; + } // Walk backwards through the basic block, looking for dependencies. while (ScanIt != BB->begin()) { @@ -368,7 +384,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { // Debug intrinsics don't (and can't) cause dependences. if (isa<DbgInfoIntrinsic>(II)) continue; - + // If we reach a lifetime begin or end marker, then the query ends here // because the value is undefined. if (II->getIntrinsicID() == Intrinsic::lifetime_start) { @@ -392,10 +408,10 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, return MemDepResult::getClobber(LI); AliasAnalysis::Location LoadLoc = AA->getLocation(LI); - + // If we found a pointer, check if it could be the same as our pointer. AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc); - + if (isLoad) { if (R == AliasAnalysis::NoAlias) { // If this is an over-aligned integer load (for example, @@ -409,10 +425,10 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase, MemLocOffset, LI, TD)) return MemDepResult::getClobber(Inst); - + continue; } - + // Must aliased loads are defs of each other. if (R == AliasAnalysis::MustAlias) return MemDepResult::getDef(Inst); @@ -427,7 +443,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, if (R == AliasAnalysis::PartialAlias) return MemDepResult::getClobber(Inst); #endif - + // Random may-alias loads don't depend on each other without a // dependence. continue; @@ -444,7 +460,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // Stores depend on may/must aliased loads. return MemDepResult::getDef(Inst); } - + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { // Atomic stores have complications involved. // FIXME: This is overly conservative. @@ -460,14 +476,16 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // Ok, this store might clobber the query pointer. Check to see if it is // a must alias: in this case, we want to return this as a def. AliasAnalysis::Location StoreLoc = AA->getLocation(SI); - + // If we found a pointer, check if it could be the same as our pointer. AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc); - + if (R == AliasAnalysis::NoAlias) continue; if (R == AliasAnalysis::MustAlias) return MemDepResult::getDef(Inst); + if (isInvariantLoad) + continue; return MemDepResult::getClobber(Inst); } @@ -482,7 +500,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo(); if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, TLI)) { const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD); - + if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr)) return MemDepResult::getDef(Inst); // Be conservative if the accessed pointer may alias the allocation. @@ -516,7 +534,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, return MemDepResult::getClobber(Inst); } } - + // No dependence found. If this is the entry block of the function, it is // unknown, otherwise it is non-local. if (BB != &BB->getParent()->getEntryBlock()) @@ -528,25 +546,25 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, /// depends. MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { Instruction *ScanPos = QueryInst; - + // Check for a cached result MemDepResult &LocalCache = LocalDeps[QueryInst]; - + // If the cached entry is non-dirty, just return it. Note that this depends // on MemDepResult's default constructing to 'dirty'. if (!LocalCache.isDirty()) return LocalCache; - + // Otherwise, if we have a dirty entry, we know we can start the scan at that // instruction, which may save us some work. if (Instruction *Inst = LocalCache.getInst()) { ScanPos = Inst; - + RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst); } - + BasicBlock *QueryParent = QueryInst->getParent(); - + // Do the scan. if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) { // No dependence found. If this is the entry block of the function, it is @@ -565,7 +583,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start; LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos, - QueryParent); + QueryParent, QueryInst); } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) { CallSite QueryCS(QueryInst); bool isReadOnly = AA->onlyReadsMemory(QueryCS); @@ -575,11 +593,11 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { // Non-memory instruction. LocalCache = MemDepResult::getUnknown(); } - + // Remember the result! if (Instruction *I = LocalCache.getInst()) ReverseLocalDeps[I].insert(QueryInst); - + return LocalCache; } @@ -620,7 +638,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { /// the uncached case, this starts out as the set of predecessors we care /// about. SmallVector<BasicBlock*, 32> DirtyBlocks; - + if (!Cache.empty()) { // Okay, we have a cache entry. If we know it is not dirty, just return it // with no computation. @@ -628,17 +646,17 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { ++NumCacheNonLocal; return Cache; } - + // If we already have a partially computed set of results, scan them to // determine what is dirty, seeding our initial DirtyBlocks worklist. for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end(); I != E; ++I) if (I->getResult().isDirty()) DirtyBlocks.push_back(I->getBB()); - + // Sort the cache so that we can do fast binary search lookups below. std::sort(Cache.begin(), Cache.end()); - + ++NumCacheDirtyNonLocal; //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: " // << Cache.size() << " cached: " << *QueryInst; @@ -649,45 +667,45 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { DirtyBlocks.push_back(*PI); ++NumUncacheNonLocal; } - + // isReadonlyCall - If this is a read-only call, we can be more aggressive. bool isReadonlyCall = AA->onlyReadsMemory(QueryCS); SmallPtrSet<BasicBlock*, 64> Visited; - + unsigned NumSortedEntries = Cache.size(); DEBUG(AssertSorted(Cache)); - + // Iterate while we still have blocks to update. while (!DirtyBlocks.empty()) { BasicBlock *DirtyBB = DirtyBlocks.back(); DirtyBlocks.pop_back(); - + // Already processed this block? if (!Visited.insert(DirtyBB)) continue; - + // Do a binary search to see if we already have an entry for this block in // the cache set. If so, find it. DEBUG(AssertSorted(Cache, NumSortedEntries)); - NonLocalDepInfo::iterator Entry = + NonLocalDepInfo::iterator Entry = std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries, NonLocalDepEntry(DirtyBB)); if (Entry != Cache.begin() && prior(Entry)->getBB() == DirtyBB) --Entry; - + NonLocalDepEntry *ExistingResult = 0; - if (Entry != Cache.begin()+NumSortedEntries && + if (Entry != Cache.begin()+NumSortedEntries && Entry->getBB() == DirtyBB) { // If we already have an entry, and if it isn't already dirty, the block // is done. if (!Entry->getResult().isDirty()) continue; - + // Otherwise, remember this slot so we can update the value. ExistingResult = &*Entry; } - + // If the dirty entry has a pointer, start scanning from it so we don't have // to rescan the entire block. BasicBlock::iterator ScanPos = DirtyBB->end(); @@ -699,10 +717,10 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { QueryCS.getInstruction()); } } - + // Find out if this block has a local dependency for QueryInst. MemDepResult Dep; - + if (ScanPos != DirtyBB->begin()) { Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB); } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) { @@ -712,14 +730,14 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { } else { Dep = MemDepResult::getNonFuncLocal(); } - + // If we had a dirty entry for the block, update it. Otherwise, just add // a new entry. if (ExistingResult) ExistingResult->setResult(Dep); else Cache.push_back(NonLocalDepEntry(DirtyBB, Dep)); - + // If the block has a dependency (i.e. it isn't completely transparent to // the value), remember the association! if (!Dep.isNonLocal()) { @@ -728,14 +746,14 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { if (Instruction *Inst = Dep.getInst()) ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction()); } else { - + // If the block *is* completely transparent to the load, we need to check // the predecessors of this block. Add them to our worklist. for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI) DirtyBlocks.push_back(*PI); } } - + return Cache; } @@ -753,9 +771,9 @@ getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad, assert(Loc.Ptr->getType()->isPointerTy() && "Can't get pointer deps of a non-pointer!"); Result.clear(); - + PHITransAddr Address(const_cast<Value *>(Loc.Ptr), TD); - + // This is the set of blocks we've inspected, and the pointer we consider in // each block. Because of critical edges, we currently bail out if querying // a block with multiple different pointers. This can happen during PHI @@ -778,7 +796,7 @@ MemDepResult MemoryDependenceAnalysis:: GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, bool isLoad, BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) { - + // Do a binary search to see if we already have an entry for this block in // the cache set. If so, find it. NonLocalDepInfo::iterator Entry = @@ -786,18 +804,18 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, NonLocalDepEntry(BB)); if (Entry != Cache->begin() && (Entry-1)->getBB() == BB) --Entry; - + NonLocalDepEntry *ExistingResult = 0; if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB) ExistingResult = &*Entry; - + // If we have a cached entry, and it is non-dirty, use it as the value for // this dependency. if (ExistingResult && !ExistingResult->getResult().isDirty()) { ++NumCacheNonLocalPtr; return ExistingResult->getResult(); - } - + } + // Otherwise, we have to scan for the value. If we have a dirty cache // entry, start scanning from its position, otherwise we scan from the end // of the block. @@ -807,30 +825,30 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, "Instruction invalidated?"); ++NumCacheDirtyNonLocalPtr; ScanPos = ExistingResult->getResult().getInst(); - + // Eliminating the dirty entry from 'Cache', so update the reverse info. ValueIsLoadPair CacheKey(Loc.Ptr, isLoad); RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey); } else { ++NumUncacheNonLocalPtr; } - + // Scan the block for the dependency. MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB); - + // If we had a dirty entry for the block, update it. Otherwise, just add // a new entry. if (ExistingResult) ExistingResult->setResult(Dep); else Cache->push_back(NonLocalDepEntry(BB, Dep)); - + // If the block has a dependency (i.e. it isn't completely transparent to // the value), remember the reverse association because we just added it // to Cache! if (!Dep.isDef() && !Dep.isClobber()) return Dep; - + // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently // update MemDep when we remove instructions. Instruction *Inst = Dep.getInst(); @@ -843,7 +861,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, /// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain /// number of elements in the array that are already properly ordered. This is /// optimized for the case when only a few entries are added. -static void +static void SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, unsigned NumSortedEntries) { switch (Cache.size() - NumSortedEntries) { @@ -895,7 +913,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, SmallVectorImpl<NonLocalDepResult> &Result, DenseMap<BasicBlock*, Value*> &Visited, bool SkipFirstBlock) { - + // Look up the cached info for Pointer. ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad); @@ -909,7 +927,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // Get the NLPI for CacheKey, inserting one into the map if it doesn't // already have one. - std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair = + std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair = NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI)); NonLocalPointerInfo *CacheInfo = &Pair.first->second; @@ -971,14 +989,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB()); if (VI == Visited.end() || VI->second == Pointer.getAddr()) continue; - + // We have a pointer mismatch in a block. Just return clobber, saying // that something was clobbered in this result. We could also do a // non-fully cached query, but there is little point in doing this. return true; } } - + Value *Addr = Pointer.getAddr(); for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); I != E; ++I) { @@ -989,7 +1007,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, ++NumCacheCompleteNonLocalPtr; return false; } - + // Otherwise, either this is a new block, a block with an invalid cache // pointer or one that we're about to invalidate by putting more info into it // than its valid cache info. If empty, the result will be valid cache info, @@ -998,10 +1016,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock); else CacheInfo->Pair = BBSkipFirstBlockPair(); - + SmallVector<BasicBlock*, 32> Worklist; Worklist.push_back(StartBB); - + // PredList used inside loop. SmallVector<std::pair<BasicBlock*, PHITransAddr>, 16> PredList; @@ -1012,10 +1030,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // revisit blocks after we insert info for them. unsigned NumSortedEntries = Cache->size(); DEBUG(AssertSorted(*Cache)); - + while (!Worklist.empty()) { BasicBlock *BB = Worklist.pop_back_val(); - + // Skip the first block if we have it. if (!SkipFirstBlock) { // Analyze the dependency of *Pointer in FromBB. See if we already have @@ -1027,14 +1045,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, DEBUG(AssertSorted(*Cache, NumSortedEntries)); MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache, NumSortedEntries); - + // If we got a Def or Clobber, add this to the list of results. if (!Dep.isNonLocal() && DT->isReachableFromEntry(BB)) { Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr())); continue; } } - + // If 'Pointer' is an instruction defined in this block, then we need to do // phi translation to change it into a value live in the predecessor block. // If not, we just add the predecessors to the worklist and scan them with @@ -1051,7 +1069,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, NewBlocks.push_back(*PI); continue; } - + // If we have seen this block before, but it was with a different // pointer then we have a phi translation failure and we have to treat // this as a clobber. @@ -1066,12 +1084,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, Worklist.append(NewBlocks.begin(), NewBlocks.end()); continue; } - + // We do need to do phi translation, if we know ahead of time we can't phi // translate this value, don't even try. if (!Pointer.IsPotentiallyPHITranslatable()) goto PredTranslationFailure; - + // We may have added values to the cache list before this PHI translation. // If so, we haven't done anything to ensure that the cache remains sorted. // Sort it now (if needed) so that recursive invocations of @@ -1094,7 +1112,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, PredPointer.PHITranslateValue(BB, Pred, 0); Value *PredPtrVal = PredPointer.getAddr(); - + // Check to see if we have already visited this pred block with another // pointer. If so, we can't do this lookup. This failure can occur // with PHI translation when a critical edge exists and the PHI node in @@ -1111,14 +1129,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // the analysis and can ignore it. if (InsertRes.first->second == PredPtrVal) continue; - + // Otherwise, the block was previously analyzed with a different // pointer. We can't represent the result of this case, so we just // treat this as a phi translation failure. // Make sure to clean up the Visited map before continuing on to // PredTranslationFailure. - for (unsigned i = 0; i < PredList.size(); i++) + for (unsigned i = 0, n = PredList.size(); i < n; ++i) Visited.erase(PredList[i].first); goto PredTranslationFailure; @@ -1127,10 +1145,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // Actually process results here; this need to be a separate loop to avoid // calling getNonLocalPointerDepFromBB for blocks we don't want to return - // any results for. (getNonLocalPointerDepFromBB will modify our + // any results for. (getNonLocalPointerDepFromBB will modify our // datastructures in ways the code after the PredTranslationFailure label // doesn't expect.) - for (unsigned i = 0; i < PredList.size(); i++) { + for (unsigned i = 0, n = PredList.size(); i < n; ++i) { BasicBlock *Pred = PredList[i].first; PHITransAddr &PredPointer = PredList[i].second; Value *PredPtrVal = PredPointer.getAddr(); @@ -1170,12 +1188,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, continue; } } - + // Refresh the CacheInfo/Cache pointer so that it isn't invalidated. CacheInfo = &NonLocalPointerDeps[CacheKey]; Cache = &CacheInfo->NonLocalDeps; NumSortedEntries = Cache->size(); - + // Since we did phi translation, the "Cache" set won't contain all of the // results for the query. This is ok (we can still use it to accelerate // specific block queries) but we can't do the fastpath "return all @@ -1188,20 +1206,20 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // The following code is "failure"; we can't produce a sane translation // for the given block. It assumes that we haven't modified any of // our datastructures while processing the current block. - + if (Cache == 0) { // Refresh the CacheInfo/Cache pointer if it got invalidated. CacheInfo = &NonLocalPointerDeps[CacheKey]; Cache = &CacheInfo->NonLocalDeps; NumSortedEntries = Cache->size(); } - + // Since we failed phi translation, the "Cache" set won't contain all of the // results for the query. This is ok (we can still use it to accelerate // specific block queries) but we can't do the fastpath "return all // results from the set". Clear out the indicator for this. CacheInfo->Pair = BBSkipFirstBlockPair(); - + // If *nothing* works, mark the pointer as unknown. // // If this is the magic first block, return this as a clobber of the whole @@ -1209,12 +1227,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // we have to bail out. if (SkipFirstBlock) return true; - + for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) { assert(I != Cache->rend() && "Didn't find current block??"); if (I->getBB() != BB) continue; - + assert(I->getResult().isNonLocal() && "Should only be here with transparent block"); I->setResult(MemDepResult::getUnknown()); @@ -1234,23 +1252,23 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, /// CachedNonLocalPointerInfo, remove it. void MemoryDependenceAnalysis:: RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) { - CachedNonLocalPointerInfo::iterator It = + CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P); if (It == NonLocalPointerDeps.end()) return; - + // Remove all of the entries in the BB->val map. This involves removing // instructions from the reverse map. NonLocalDepInfo &PInfo = It->second.NonLocalDeps; - + for (unsigned i = 0, e = PInfo.size(); i != e; ++i) { Instruction *Target = PInfo[i].getResult().getInst(); if (Target == 0) continue; // Ignore non-local dep results. assert(Target->getParent() == PInfo[i].getBB()); - + // Eliminating the dirty entry from 'Cache', so update the reverse info. RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P); } - + // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo). NonLocalPointerDeps.erase(It); } @@ -1305,20 +1323,20 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { // Remove this local dependency info. LocalDeps.erase(LocalDepEntry); } - + // If we have any cached pointer dependencies on this instruction, remove // them. If the instruction has non-pointer type, then it can't be a pointer // base. - + // Remove it from both the load info and the store info. The instruction // can't be in either of these maps if it is non-pointer. if (RemInst->getType()->isPointerTy()) { RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false)); RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true)); } - + // Loop over all of the things that depend on the instruction we're removing. - // + // SmallVector<std::pair<Instruction*, Instruction*>, 8> ReverseDepsToAdd; // If we find RemInst as a clobber or Def in any of the maps for other values, @@ -1330,29 +1348,29 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { MemDepResult NewDirtyVal; if (!RemInst->isTerminator()) NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst)); - + ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst); if (ReverseDepIt != ReverseLocalDeps.end()) { SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second; // RemInst can't be the terminator if it has local stuff depending on it. assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) && "Nothing can locally depend on a terminator"); - + for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(), E = ReverseDeps.end(); I != E; ++I) { Instruction *InstDependingOnRemInst = *I; assert(InstDependingOnRemInst != RemInst && "Already removed our local dep info"); - + LocalDeps[InstDependingOnRemInst] = NewDirtyVal; - + // Make sure to remember that new things depend on NewDepInst. assert(NewDirtyVal.getInst() && "There is no way something else can have " "a local dep on this if it is a terminator!"); - ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(), + ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst)); } - + ReverseLocalDeps.erase(ReverseDepIt); // Add new reverse deps after scanning the set, to avoid invalidating the @@ -1363,25 +1381,25 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { ReverseDepsToAdd.pop_back(); } } - + ReverseDepIt = ReverseNonLocalDeps.find(RemInst); if (ReverseDepIt != ReverseNonLocalDeps.end()) { SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second; for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end(); I != E; ++I) { assert(*I != RemInst && "Already removed NonLocalDep info for RemInst"); - + PerInstNLInfo &INLD = NonLocalDeps[*I]; // The information is now dirty! INLD.second = true; - - for (NonLocalDepInfo::iterator DI = INLD.first.begin(), + + for (NonLocalDepInfo::iterator DI = INLD.first.begin(), DE = INLD.first.end(); DI != DE; ++DI) { if (DI->getResult().getInst() != RemInst) continue; - + // Convert to a dirty entry for the subsequent instruction. DI->setResult(NewDirtyVal); - + if (Instruction *NextI = NewDirtyVal.getInst()) ReverseDepsToAdd.push_back(std::make_pair(NextI, *I)); } @@ -1396,7 +1414,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { ReverseDepsToAdd.pop_back(); } } - + // If the instruction is in ReverseNonLocalPtrDeps then it appears as a // value in the NonLocalPointerDeps info. ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt = @@ -1404,45 +1422,45 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) { SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second; SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd; - + for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(), E = Set.end(); I != E; ++I) { ValueIsLoadPair P = *I; assert(P.getPointer() != RemInst && "Already removed NonLocalPointerDeps info for RemInst"); - + NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps; - + // The cache is not valid for any specific block anymore. NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair(); - + // Update any entries for RemInst to use the instruction after it. for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end(); DI != DE; ++DI) { if (DI->getResult().getInst() != RemInst) continue; - + // Convert to a dirty entry for the subsequent instruction. DI->setResult(NewDirtyVal); - + if (Instruction *NewDirtyInst = NewDirtyVal.getInst()) ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P)); } - + // Re-sort the NonLocalDepInfo. Changing the dirty entry to its // subsequent value may invalidate the sortedness. std::sort(NLPDI.begin(), NLPDI.end()); } - + ReverseNonLocalPtrDeps.erase(ReversePtrDepIt); - + while (!ReversePtrDepsToAdd.empty()) { ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first] .insert(ReversePtrDepsToAdd.back().second); ReversePtrDepsToAdd.pop_back(); } } - - + + assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?"); AA->deleteValue(RemInst); DEBUG(verifyRemoved(RemInst)); @@ -1456,7 +1474,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { assert(I->second.getInst() != D && "Inst occurs in data structures"); } - + for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(), E = NonLocalPointerDeps.end(); I != E; ++I) { assert(I->first.getPointer() != D && "Inst occurs in NLPD map key"); @@ -1465,7 +1483,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { II != E; ++II) assert(II->getResult().getInst() != D && "Inst occurs as NLPD value"); } - + for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(), E = NonLocalDeps.end(); I != E; ++I) { assert(I->first != D && "Inst occurs in data structures"); @@ -1474,7 +1492,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { EE = INLD.first.end(); II != EE; ++II) assert(II->getResult().getInst() != D && "Inst occurs in data structures"); } - + for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(), E = ReverseLocalDeps.end(); I != E; ++I) { assert(I->first != D && "Inst occurs in data structures"); @@ -1482,7 +1500,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { EE = I->second.end(); II != EE; ++II) assert(*II != D && "Inst occurs in data structures"); } - + for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(), E = ReverseNonLocalDeps.end(); I != E; ++I) { @@ -1491,17 +1509,17 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { EE = I->second.end(); II != EE; ++II) assert(*II != D && "Inst occurs in data structures"); } - + for (ReverseNonLocalPtrDepTy::const_iterator I = ReverseNonLocalPtrDeps.begin(), E = ReverseNonLocalPtrDeps.end(); I != E; ++I) { assert(I->first != D && "Inst occurs in rev NLPD map"); - + for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(), E = I->second.end(); II != E; ++II) assert(*II != ValueIsLoadPair(D, false) && *II != ValueIsLoadPair(D, true) && "Inst occurs in ReverseNonLocalPtrDeps map"); } - + } diff --git a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp index f8c7514..0341537 100644 --- a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp +++ b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -16,13 +16,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/Passes.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Assembly/Writer.h" #include "llvm/DebugInfo.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; namespace { diff --git a/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp index 2eb4137..907e9621 100644 --- a/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp @@ -12,10 +12,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Pass.h" -#include "llvm/DataLayout.h" using namespace llvm; namespace { diff --git a/contrib/llvm/lib/Analysis/PHITransAddr.cpp b/contrib/llvm/lib/Analysis/PHITransAddr.cpp index c35737e..e6af066 100644 --- a/contrib/llvm/lib/Analysis/PHITransAddr.cpp +++ b/contrib/llvm/lib/Analysis/PHITransAddr.cpp @@ -12,11 +12,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/PHITransAddr.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" diff --git a/contrib/llvm/lib/Analysis/PathNumbering.cpp b/contrib/llvm/lib/Analysis/PathNumbering.cpp index d4ad726..30d213b 100644 --- a/contrib/llvm/lib/Analysis/PathNumbering.cpp +++ b/contrib/llvm/lib/Analysis/PathNumbering.cpp @@ -25,24 +25,23 @@ #define DEBUG_TYPE "ball-larus-numbering" #include "llvm/Analysis/PathNumbering.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/InstrTypes.h" -#include "llvm/Instructions.h" -#include "llvm/Module.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/TypeBuilder.h" #include "llvm/Pass.h" -#include "llvm/TypeBuilder.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" - #include <queue> +#include <sstream> #include <stack> #include <string> #include <utility> -#include <sstream> using namespace llvm; diff --git a/contrib/llvm/lib/Analysis/PathProfileInfo.cpp b/contrib/llvm/lib/Analysis/PathProfileInfo.cpp index b361d3f..bc53221 100644 --- a/contrib/llvm/lib/Analysis/PathProfileInfo.cpp +++ b/contrib/llvm/lib/Analysis/PathProfileInfo.cpp @@ -13,15 +13,14 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "path-profile-info" -#include "llvm/Module.h" -#include "llvm/Pass.h" +#include "llvm/Analysis/PathProfileInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ProfileInfoTypes.h" -#include "llvm/Analysis/PathProfileInfo.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" - #include <cstdio> using namespace llvm; diff --git a/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp b/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp index 0fcdfe7..48d7d05 100644 --- a/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp +++ b/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp @@ -13,15 +13,14 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "path-profile-verifier" -#include "llvm/Module.h" -#include "llvm/Pass.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/ProfileInfoTypes.h" #include "llvm/Analysis/PathProfileInfo.h" -#include "llvm/Support/Debug.h" +#include "llvm/Analysis/ProfileInfoTypes.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" - #include <stdio.h> using namespace llvm; @@ -85,7 +84,7 @@ bool PathProfileVerifier::runOnModule (Module &M) { for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { if (F->isDeclaration()) continue; - arrayMap[0][F->begin()][0] = i++; + arrayMap[(BasicBlock*)0][F->begin()][0] = i++; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { TerminatorInst *TI = BB->getTerminator(); @@ -126,7 +125,7 @@ bool PathProfileVerifier::runOnModule (Module &M) { << currentPath->getCount() << "\n"); // setup the entry edge (normally path profiling doesn't care about this) if (currentPath->getFirstBlockInPath() == &F->getEntryBlock()) - edgeArray[arrayMap[0][currentPath->getFirstBlockInPath()][0]] + edgeArray[arrayMap[(BasicBlock*)0][currentPath->getFirstBlockInPath()][0]] += currentPath->getCount(); for( ProfilePathEdgeIterator nextEdge = pev->begin(), diff --git a/contrib/llvm/lib/Analysis/PostDominators.cpp b/contrib/llvm/lib/Analysis/PostDominators.cpp index 6ed2729..96804a0 100644 --- a/contrib/llvm/lib/Analysis/PostDominators.cpp +++ b/contrib/llvm/lib/Analysis/PostDominators.cpp @@ -14,13 +14,13 @@ #define DEBUG_TYPE "postdomtree" #include "llvm/Analysis/PostDominators.h" -#include "llvm/Instructions.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/Debug.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetOperations.h" -#include "llvm/Assembly/Writer.h" #include "llvm/Analysis/DominatorInternals.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" using namespace llvm; //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Analysis/ProfileDataLoader.cpp b/contrib/llvm/lib/Analysis/ProfileDataLoader.cpp index a4f634a..d7f444b 100644 --- a/contrib/llvm/lib/Analysis/ProfileDataLoader.cpp +++ b/contrib/llvm/lib/Analysis/ProfileDataLoader.cpp @@ -12,12 +12,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/ProfileDataLoader.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/OwningPtr.h" -#include "llvm/Module.h" -#include "llvm/InstrTypes.h" -#include "llvm/Analysis/ProfileDataLoader.h" #include "llvm/Analysis/ProfileDataTypes.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Module.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/system_error.h" #include <cstdio> diff --git a/contrib/llvm/lib/Analysis/ProfileDataLoaderPass.cpp b/contrib/llvm/lib/Analysis/ProfileDataLoaderPass.cpp index c43cff0..2ee0093 100644 --- a/contrib/llvm/lib/Analysis/ProfileDataLoaderPass.cpp +++ b/contrib/llvm/lib/Analysis/ProfileDataLoaderPass.cpp @@ -15,22 +15,22 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "profile-metadata-loader" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/BasicBlock.h" -#include "llvm/InstrTypes.h" -#include "llvm/Module.h" -#include "llvm/LLVMContext.h" -#include "llvm/MDBuilder.h" -#include "llvm/Metadata.h" -#include "llvm/Pass.h" #include "llvm/Analysis/Passes.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ProfileDataLoader.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Format.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; STATISTIC(NumEdgesRead, "The # of edges read."); diff --git a/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp b/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp index 12b59e0..b284b99 100644 --- a/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp +++ b/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp @@ -12,14 +12,14 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "profile-estimator" -#include "llvm/Pass.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/ProfileInfo.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; static cl::opt<double> diff --git a/contrib/llvm/lib/Analysis/ProfileInfo.cpp b/contrib/llvm/lib/Analysis/ProfileInfo.cpp index b5b7ac1..9626a48 100644 --- a/contrib/llvm/lib/Analysis/ProfileInfo.cpp +++ b/contrib/llvm/lib/Analysis/ProfileInfo.cpp @@ -12,16 +12,16 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "profile-info" -#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ProfileInfo.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/Passes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" -#include "llvm/ADT/SmallSet.h" -#include <set> -#include <queue> #include <limits> +#include <queue> +#include <set> using namespace llvm; namespace llvm { @@ -249,7 +249,7 @@ const BasicBlock *ProfileInfoT<Function,BasicBlock>:: succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB); if (Succ == End) { - P[0] = BB; + P[(const BasicBlock*)0] = BB; if (Mode & GetPathToExit) { hasFoundPath = true; BB = 0; @@ -752,10 +752,10 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) { Succ != End; ++Succ) { Path P; GetPath(*Succ, 0, P, GetPathToExit); - if (Dest && Dest != P[0]) { + if (Dest && Dest != P[(const BasicBlock*)0]) { AllEdgesHaveSameReturn = false; } - Dest = P[0]; + Dest = P[(const BasicBlock*)0]; } if (AllEdgesHaveSameReturn) { if(EstimateMissingEdges(BB)) { @@ -927,7 +927,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) { Path P; const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges); - Dest = P[0]; + Dest = P[(const BasicBlock*)0]; if (!Dest) continue; if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) { diff --git a/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp b/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp index 5c7c97c..f1f3e940 100644 --- a/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp +++ b/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp @@ -14,8 +14,8 @@ #include "llvm/Analysis/ProfileInfoLoader.h" #include "llvm/Analysis/ProfileInfoTypes.h" -#include "llvm/Module.h" -#include "llvm/InstrTypes.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Module.h" #include "llvm/Support/raw_ostream.h" #include <cstdio> #include <cstdlib> diff --git a/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp b/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp index 5ecf052..346f8d6 100644 --- a/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp +++ b/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp @@ -12,20 +12,20 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "profile-loader" -#include "llvm/BasicBlock.h" -#include "llvm/InstrTypes.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" #include "llvm/Analysis/Passes.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Analysis/ProfileInfoLoader.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Format.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/Support/raw_ostream.h" #include <set> using namespace llvm; diff --git a/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp b/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp index 0cb1588..c8896de 100644 --- a/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp +++ b/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp @@ -12,17 +12,18 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "profile-verifier" -#include "llvm/Instructions.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" +#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ProfileInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/CallSite.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" #include "llvm/Support/InstIterator.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/Debug.h" #include <set> using namespace llvm; diff --git a/contrib/llvm/lib/Analysis/PtrUseVisitor.cpp b/contrib/llvm/lib/Analysis/PtrUseVisitor.cpp new file mode 100644 index 0000000..0a342b2 --- /dev/null +++ b/contrib/llvm/lib/Analysis/PtrUseVisitor.cpp @@ -0,0 +1,36 @@ +//===- PtrUseVisitor.cpp - InstVisitors over a pointers uses --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// Implementation of the pointer use visitors. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/PtrUseVisitor.h" + +using namespace llvm; + +void detail::PtrUseVisitorBase::enqueueUsers(Instruction &I) { + for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); + UI != UE; ++UI) { + if (VisitedUses.insert(&UI.getUse())) { + UseToVisit NewU = { + UseToVisit::UseAndIsOffsetKnownPair(&UI.getUse(), IsOffsetKnown), + Offset + }; + Worklist.push_back(llvm_move(NewU)); + } + } +} + +bool detail::PtrUseVisitorBase::adjustOffsetForGEP(GetElementPtrInst &GEPI) { + if (!IsOffsetKnown) + return false; + + return GEPI.accumulateConstantOffset(DL, Offset); +} diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp index 30f0d2f..fad5074 100644 --- a/contrib/llvm/lib/Analysis/RegionInfo.cpp +++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp @@ -10,14 +10,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/RegionInfo.h" -#include "llvm/Analysis/RegionIterator.h" - #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/RegionIterator.h" #include "llvm/Assembly/Writer.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" #define DEBUG_TYPE "region" #include "llvm/Support/Debug.h" diff --git a/contrib/llvm/lib/Analysis/RegionPrinter.cpp b/contrib/llvm/lib/Analysis/RegionPrinter.cpp index 8b23cc7..c5f1b92 100644 --- a/contrib/llvm/lib/Analysis/RegionPrinter.cpp +++ b/contrib/llvm/lib/Analysis/RegionPrinter.cpp @@ -9,16 +9,16 @@ // Print out the region tree of a function using dotty/graphviz. //===----------------------------------------------------------------------===// +#include "llvm/Analysis/Passes.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DOTGraphTraitsPass.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" #include "llvm/Analysis/RegionPrinter.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/DOTGraphTraitsPass.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp index e3189ec..6ea915f 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp @@ -59,22 +59,25 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "scalar-evolution" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/GlobalVariable.h" -#include "llvm/GlobalAlias.h" -#include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" -#include "llvm/Operator.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" @@ -83,9 +86,7 @@ #include "llvm/Support/InstIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Target/TargetLibraryInfo.h" #include <algorithm> using namespace llvm; @@ -4229,6 +4230,25 @@ ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const { return Max ? Max : SE->getCouldNotCompute(); } +bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S, + ScalarEvolution *SE) const { + if (Max && Max != SE->getCouldNotCompute() && SE->hasOperand(Max, S)) + return true; + + if (!ExitNotTaken.ExitingBlock) + return false; + + for (const ExitNotTakenInfo *ENT = &ExitNotTaken; + ENT != 0; ENT = ENT->getNextExit()) { + + if (ENT->ExactNotTaken != SE->getCouldNotCompute() + && SE->hasOperand(ENT->ExactNotTaken, S)) { + return true; + } + } + return false; +} + /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each /// computable exit into a persistent ExitNotTakenInfo array. ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( @@ -6120,8 +6140,8 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, getTypeSizeInBits(ICI->getOperand(0)->getType())) return false; - // Now that we found a conditional branch that dominates the loop, check to - // see if it is the comparison we are looking for. + // Now that we found a conditional branch that dominates the loop or controls + // the loop latch. Check to see if it is the comparison we are looking for. ICmpInst::Predicate FoundPred; if (Inverse) FoundPred = ICI->getInversePredicate(); @@ -6939,6 +6959,17 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { BlockDispositions.erase(S); UnsignedRanges.erase(S); SignedRanges.erase(S); + + for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I = + BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) { + BackedgeTakenInfo &BEInfo = I->second; + if (BEInfo.hasOperand(S, this)) { + BEInfo.clear(); + BackedgeTakenCounts.erase(I++); + } + else + ++I; + } } typedef DenseMap<const Loop *, std::string> VerifyMap; diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp index e9edb3e..79c5f0d 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -19,9 +19,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/Passes.h" #include "llvm/Pass.h" using namespace llvm; diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp index 111bfb4..fcd7ce2 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -14,13 +14,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/ADT/STLExtras.h" using namespace llvm; @@ -1523,9 +1523,8 @@ Value *SCEVExpander::expand(const SCEV *S) { } // Check to see if we already expanded this here. - std::map<std::pair<const SCEV *, Instruction *>, - AssertingVH<Value> >::iterator I = - InsertedExpressions.find(std::make_pair(S, InsertPt)); + std::map<std::pair<const SCEV *, Instruction *>, TrackingVH<Value> >::iterator + I = InsertedExpressions.find(std::make_pair(S, InsertPt)); if (I != InsertedExpressions.end()) return I->second; @@ -1600,14 +1599,14 @@ static bool width_descending(Value *lhs, Value *rhs) { /// the same context that SCEVExpander is used. unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl<WeakVH> &DeadInsts, - const TargetLowering *TLI) { + const TargetTransformInfo *TTI) { // Find integer phis in order of increasing width. SmallVector<PHINode*, 8> Phis; for (BasicBlock::iterator I = L->getHeader()->begin(); PHINode *Phi = dyn_cast<PHINode>(I); ++I) { Phis.push_back(Phi); } - if (TLI) + if (TTI) std::sort(Phis.begin(), Phis.end(), width_descending); unsigned NumElim = 0; @@ -1635,8 +1634,8 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)]; if (!OrigPhiRef) { OrigPhiRef = Phi; - if (Phi->getType()->isIntegerTy() && TLI - && TLI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { + if (Phi->getType()->isIntegerTy() && TTI + && TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { // This phi can be freely truncated to the narrowest phi type. Map the // truncated expression to it so it will be reused for narrow types. const SCEV *TruncExpr = diff --git a/contrib/llvm/lib/Analysis/SparsePropagation.cpp b/contrib/llvm/lib/Analysis/SparsePropagation.cpp index c819666..15b7872 100644 --- a/contrib/llvm/lib/Analysis/SparsePropagation.cpp +++ b/contrib/llvm/lib/Analysis/SparsePropagation.cpp @@ -14,9 +14,9 @@ #define DEBUG_TYPE "sparseprop" #include "llvm/Analysis/SparsePropagation.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp new file mode 100644 index 0000000..64f8e96 --- /dev/null +++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -0,0 +1,558 @@ +//===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "tti" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +// Setup the analysis group to manage the TargetTransformInfo passes. +INITIALIZE_ANALYSIS_GROUP(TargetTransformInfo, "Target Information", NoTTI) +char TargetTransformInfo::ID = 0; + +TargetTransformInfo::~TargetTransformInfo() { +} + +void TargetTransformInfo::pushTTIStack(Pass *P) { + TopTTI = this; + PrevTTI = &P->getAnalysis<TargetTransformInfo>(); + + // Walk up the chain and update the top TTI pointer. + for (TargetTransformInfo *PTTI = PrevTTI; PTTI; PTTI = PTTI->PrevTTI) + PTTI->TopTTI = this; +} + +void TargetTransformInfo::popTTIStack() { + TopTTI = 0; + + // Walk up the chain and update the top TTI pointer. + for (TargetTransformInfo *PTTI = PrevTTI; PTTI; PTTI = PTTI->PrevTTI) + PTTI->TopTTI = PrevTTI; + + PrevTTI = 0; +} + +void TargetTransformInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetTransformInfo>(); +} + +unsigned TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty, + Type *OpTy) const { + return PrevTTI->getOperationCost(Opcode, Ty, OpTy); +} + +unsigned TargetTransformInfo::getGEPCost( + const Value *Ptr, ArrayRef<const Value *> Operands) const { + return PrevTTI->getGEPCost(Ptr, Operands); +} + +unsigned TargetTransformInfo::getCallCost(FunctionType *FTy, + int NumArgs) const { + return PrevTTI->getCallCost(FTy, NumArgs); +} + +unsigned TargetTransformInfo::getCallCost(const Function *F, + int NumArgs) const { + return PrevTTI->getCallCost(F, NumArgs); +} + +unsigned TargetTransformInfo::getCallCost( + const Function *F, ArrayRef<const Value *> Arguments) const { + return PrevTTI->getCallCost(F, Arguments); +} + +unsigned TargetTransformInfo::getIntrinsicCost( + Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> ParamTys) const { + return PrevTTI->getIntrinsicCost(IID, RetTy, ParamTys); +} + +unsigned TargetTransformInfo::getIntrinsicCost( + Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments) const { + return PrevTTI->getIntrinsicCost(IID, RetTy, Arguments); +} + +unsigned TargetTransformInfo::getUserCost(const User *U) const { + return PrevTTI->getUserCost(U); +} + +bool TargetTransformInfo::isLoweredToCall(const Function *F) const { + return PrevTTI->isLoweredToCall(F); +} + +bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const { + return PrevTTI->isLegalAddImmediate(Imm); +} + +bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const { + return PrevTTI->isLegalICmpImmediate(Imm); +} + +bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, + bool HasBaseReg, + int64_t Scale) const { + return PrevTTI->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, + Scale); +} + +bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const { + return PrevTTI->isTruncateFree(Ty1, Ty2); +} + +bool TargetTransformInfo::isTypeLegal(Type *Ty) const { + return PrevTTI->isTypeLegal(Ty); +} + +unsigned TargetTransformInfo::getJumpBufAlignment() const { + return PrevTTI->getJumpBufAlignment(); +} + +unsigned TargetTransformInfo::getJumpBufSize() const { + return PrevTTI->getJumpBufSize(); +} + +bool TargetTransformInfo::shouldBuildLookupTables() const { + return PrevTTI->shouldBuildLookupTables(); +} + +TargetTransformInfo::PopcntSupportKind +TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const { + return PrevTTI->getPopcntSupport(IntTyWidthInBit); +} + +unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { + return PrevTTI->getIntImmCost(Imm, Ty); +} + +unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { + return PrevTTI->getNumberOfRegisters(Vector); +} + +unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { + return PrevTTI->getRegisterBitWidth(Vector); +} + +unsigned TargetTransformInfo::getMaximumUnrollFactor() const { + return PrevTTI->getMaximumUnrollFactor(); +} + +unsigned TargetTransformInfo::getArithmeticInstrCost(unsigned Opcode, + Type *Ty, + OperandValueKind Op1Info, + OperandValueKind Op2Info) const { + return PrevTTI->getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info); +} + +unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index, Type *SubTp) const { + return PrevTTI->getShuffleCost(Kind, Tp, Index, SubTp); +} + +unsigned TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const { + return PrevTTI->getCastInstrCost(Opcode, Dst, Src); +} + +unsigned TargetTransformInfo::getCFInstrCost(unsigned Opcode) const { + return PrevTTI->getCFInstrCost(Opcode); +} + +unsigned TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const { + return PrevTTI->getCmpSelInstrCost(Opcode, ValTy, CondTy); +} + +unsigned TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { + return PrevTTI->getVectorInstrCost(Opcode, Val, Index); +} + +unsigned TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const { + return PrevTTI->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + ; +} + +unsigned +TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, + Type *RetTy, + ArrayRef<Type *> Tys) const { + return PrevTTI->getIntrinsicInstrCost(ID, RetTy, Tys); +} + +unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const { + return PrevTTI->getNumberOfParts(Tp); +} + +unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp) const { + return PrevTTI->getAddressComputationCost(Tp); +} + +namespace { + +struct NoTTI : ImmutablePass, TargetTransformInfo { + const DataLayout *DL; + + NoTTI() : ImmutablePass(ID), DL(0) { + initializeNoTTIPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + // Note that this subclass is special, and must *not* call initializeTTI as + // it does not chain. + TopTTI = this; + PrevTTI = 0; + DL = getAnalysisIfAvailable<DataLayout>(); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + // Note that this subclass is special, and must *not* call + // TTI::getAnalysisUsage as it breaks the recursion. + } + + /// Pass identification. + static char ID; + + /// Provide necessary pointer adjustments for the two base classes. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &TargetTransformInfo::ID) + return (TargetTransformInfo*)this; + return this; + } + + unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) const { + switch (Opcode) { + default: + // By default, just classify everything as 'basic'. + return TCC_Basic; + + case Instruction::GetElementPtr: + llvm_unreachable("Use getGEPCost for GEP operations!"); + + case Instruction::BitCast: + assert(OpTy && "Cast instructions must provide the operand type"); + if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy())) + // Identity and pointer-to-pointer casts are free. + return TCC_Free; + + // Otherwise, the default basic cost is used. + return TCC_Basic; + + case Instruction::IntToPtr: + // An inttoptr cast is free so long as the input is a legal integer type + // which doesn't contain values outside the range of a pointer. + if (DL && DL->isLegalInteger(OpTy->getScalarSizeInBits()) && + OpTy->getScalarSizeInBits() <= DL->getPointerSizeInBits()) + return TCC_Free; + + // Otherwise it's not a no-op. + return TCC_Basic; + + case Instruction::PtrToInt: + // A ptrtoint cast is free so long as the result is large enough to store + // the pointer, and a legal integer type. + if (DL && DL->isLegalInteger(Ty->getScalarSizeInBits()) && + Ty->getScalarSizeInBits() >= DL->getPointerSizeInBits()) + return TCC_Free; + + // Otherwise it's not a no-op. + return TCC_Basic; + + case Instruction::Trunc: + // trunc to a native type is free (assuming the target has compare and + // shift-right of the same width). + if (DL && DL->isLegalInteger(DL->getTypeSizeInBits(Ty))) + return TCC_Free; + + return TCC_Basic; + } + } + + unsigned getGEPCost(const Value *Ptr, + ArrayRef<const Value *> Operands) const { + // In the basic model, we just assume that all-constant GEPs will be folded + // into their uses via addressing modes. + for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) + if (!isa<Constant>(Operands[Idx])) + return TCC_Basic; + + return TCC_Free; + } + + unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const { + assert(FTy && "FunctionType must be provided to this routine."); + + // The target-independent implementation just measures the size of the + // function by approximating that each argument will take on average one + // instruction to prepare. + + if (NumArgs < 0) + // Set the argument number to the number of explicit arguments in the + // function. + NumArgs = FTy->getNumParams(); + + return TCC_Basic * (NumArgs + 1); + } + + unsigned getCallCost(const Function *F, int NumArgs = -1) const { + assert(F && "A concrete function must be provided to this routine."); + + if (NumArgs < 0) + // Set the argument number to the number of explicit arguments in the + // function. + NumArgs = F->arg_size(); + + if (Intrinsic::ID IID = (Intrinsic::ID)F->getIntrinsicID()) { + FunctionType *FTy = F->getFunctionType(); + SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end()); + return TopTTI->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys); + } + + if (!TopTTI->isLoweredToCall(F)) + return TCC_Basic; // Give a basic cost if it will be lowered directly. + + return TopTTI->getCallCost(F->getFunctionType(), NumArgs); + } + + unsigned getCallCost(const Function *F, + ArrayRef<const Value *> Arguments) const { + // Simply delegate to generic handling of the call. + // FIXME: We should use instsimplify or something else to catch calls which + // will constant fold with these arguments. + return TopTTI->getCallCost(F, Arguments.size()); + } + + unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef<Type *> ParamTys) const { + switch (IID) { + default: + // Intrinsics rarely (if ever) have normal argument setup constraints. + // Model them as having a basic instruction cost. + // FIXME: This is wrong for libc intrinsics. + return TCC_Basic; + + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + // These intrinsics don't actually represent code after lowering. + return TCC_Free; + } + } + + unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef<const Value *> Arguments) const { + // Delegate to the generic intrinsic handling code. This mostly provides an + // opportunity for targets to (for example) special case the cost of + // certain intrinsics based on constants used as arguments. + SmallVector<Type *, 8> ParamTys; + ParamTys.reserve(Arguments.size()); + for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) + ParamTys.push_back(Arguments[Idx]->getType()); + return TopTTI->getIntrinsicCost(IID, RetTy, ParamTys); + } + + unsigned getUserCost(const User *U) const { + if (isa<PHINode>(U)) + return TCC_Free; // Model all PHI nodes as free. + + if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) + // In the basic model we just assume that all-constant GEPs will be + // folded into their uses via addressing modes. + return GEP->hasAllConstantIndices() ? TCC_Free : TCC_Basic; + + if (ImmutableCallSite CS = U) { + const Function *F = CS.getCalledFunction(); + if (!F) { + // Just use the called value type. + Type *FTy = CS.getCalledValue()->getType()->getPointerElementType(); + return TopTTI->getCallCost(cast<FunctionType>(FTy), CS.arg_size()); + } + + SmallVector<const Value *, 8> Arguments; + for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), + AE = CS.arg_end(); + AI != AE; ++AI) + Arguments.push_back(*AI); + + return TopTTI->getCallCost(F, Arguments); + } + + if (const CastInst *CI = dyn_cast<CastInst>(U)) { + // Result of a cmp instruction is often extended (to be used by other + // cmp instructions, logical or return instructions). These are usually + // nop on most sane targets. + if (isa<CmpInst>(CI->getOperand(0))) + return TCC_Free; + } + + // Otherwise delegate to the fully generic implementations. + return getOperationCost(Operator::getOpcode(U), U->getType(), + U->getNumOperands() == 1 ? + U->getOperand(0)->getType() : 0); + } + + bool isLoweredToCall(const Function *F) const { + // FIXME: These should almost certainly not be handled here, and instead + // handled with the help of TLI or the target itself. This was largely + // ported from existing analysis heuristics here so that such refactorings + // can take place in the future. + + if (F->isIntrinsic()) + return false; + + if (F->hasLocalLinkage() || !F->hasName()) + return true; + + StringRef Name = F->getName(); + + // These will all likely lower to a single selection DAG node. + if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || + Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || + Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || + Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") + return false; + + // These are all likely to be optimized into something smaller. + if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || + Name == "exp2l" || Name == "exp2f" || Name == "floor" || Name == + "floorf" || Name == "ceil" || Name == "round" || Name == "ffs" || + Name == "ffsl" || Name == "abs" || Name == "labs" || Name == "llabs") + return false; + + return true; + } + + bool isLegalAddImmediate(int64_t Imm) const { + return false; + } + + bool isLegalICmpImmediate(int64_t Imm) const { + return false; + } + + bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg, int64_t Scale) const { + // Guess that reg+reg addressing is allowed. This heuristic is taken from + // the implementation of LSR. + return !BaseGV && BaseOffset == 0 && Scale <= 1; + } + + bool isTruncateFree(Type *Ty1, Type *Ty2) const { + return false; + } + + bool isTypeLegal(Type *Ty) const { + return false; + } + + unsigned getJumpBufAlignment() const { + return 0; + } + + unsigned getJumpBufSize() const { + return 0; + } + + bool shouldBuildLookupTables() const { + return true; + } + + PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const { + return PSK_Software; + } + + unsigned getIntImmCost(const APInt &Imm, Type *Ty) const { + return 1; + } + + unsigned getNumberOfRegisters(bool Vector) const { + return 8; + } + + unsigned getRegisterBitWidth(bool Vector) const { + return 32; + } + + unsigned getMaximumUnrollFactor() const { + return 1; + } + + unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, + OperandValueKind) const { + return 1; + } + + unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index = 0, Type *SubTp = 0) const { + return 1; + } + + unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const { + return 1; + } + + unsigned getCFInstrCost(unsigned Opcode) const { + return 1; + } + + unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy = 0) const { + return 1; + } + + unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index = -1) const { + return 1; + } + + unsigned getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const { + return 1; + } + + unsigned getIntrinsicInstrCost(Intrinsic::ID ID, + Type *RetTy, + ArrayRef<Type*> Tys) const { + return 1; + } + + unsigned getNumberOfParts(Type *Tp) const { + return 0; + } + + unsigned getAddressComputationCost(Type *Tp) const { + return 0; + } +}; + +} // end anonymous namespace + +INITIALIZE_AG_PASS(NoTTI, TargetTransformInfo, "notti", + "No target information", true, true, true) +char NoTTI::ID = 0; + +ImmutablePass *llvm::createNoTargetTransformInfoPass() { + return new NoTTI(); +} diff --git a/contrib/llvm/lib/Analysis/Trace.cpp b/contrib/llvm/lib/Analysis/Trace.cpp index 22da857..4c68322 100644 --- a/contrib/llvm/lib/Analysis/Trace.cpp +++ b/contrib/llvm/lib/Analysis/Trace.cpp @@ -16,8 +16,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/Trace.h" -#include "llvm/Function.h" #include "llvm/Assembly/Writer.h" +#include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index 0faf1398..68e43b2 100644 --- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -57,12 +57,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Constants.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/Metadata.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" using namespace llvm; diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp index 3beb373..45dcc5e 100644 --- a/contrib/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp @@ -13,21 +13,21 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ValueTracking.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/GlobalVariable.h" -#include "llvm/GlobalAlias.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Metadata.h" -#include "llvm/Operator.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/PatternMatch.h" -#include "llvm/ADT/SmallPtrSet.h" #include <cstring> using namespace llvm; using namespace llvm::PatternMatch; @@ -58,7 +58,7 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); - + // If all of the MaskV bits are known to be zero, then we know the // output top bits are zero, because we now know that the output is // from [0-C]. @@ -84,7 +84,7 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes(); llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes(); // Determine which operand has more trailing zeros, and use that @@ -266,11 +266,11 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { Elt = CDS->getElementAsInteger(i); KnownZero &= ~Elt; - KnownOne &= Elt; + KnownOne &= Elt; } return; } - + // The address of an aligned GlobalValue has trailing zeros. if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { unsigned Align = GV->getAlignment(); @@ -306,7 +306,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, } return; } - + if (Argument *A = dyn_cast<Argument>(V)) { unsigned Align = 0; @@ -345,9 +345,9 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, // If either the LHS or the RHS are Zero, the result is zero. ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + // Output known-1 bits are only known if set in both the LHS & RHS. KnownOne &= KnownOne2; // Output known-0 are known to be clear if zero in either the LHS | RHS. @@ -357,9 +357,9 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, case Instruction::Or: { ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + // Output known-0 bits are only known if clear in both the LHS & RHS. KnownZero &= KnownZero2; // Output known-1 are known to be set if set in either the LHS | RHS. @@ -369,9 +369,9 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, case Instruction::Xor: { ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + // Output known-0 bits are known if clear or set in both the LHS & RHS. APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); // Output known-1 are known to be set if set in only one of the LHS, RHS. @@ -407,8 +407,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, ComputeMaskedBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1); ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); // Only known if known in both the LHS and RHS. KnownOne &= KnownOne2; @@ -433,7 +433,12 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, unsigned SrcBitWidth; // Note that we handle pointer operands here because of inttoptr/ptrtoint // which fall through here. - SrcBitWidth = TD->getTypeSizeInBits(SrcTy->getScalarType()); + if(TD) { + SrcBitWidth = TD->getTypeSizeInBits(SrcTy->getScalarType()); + } else { + SrcBitWidth = SrcTy->getScalarSizeInBits(); + if (!SrcBitWidth) return; + } assert(SrcBitWidth && "SrcBitWidth can't be zero"); KnownZero = KnownZero.zextOrTrunc(SrcBitWidth); @@ -460,11 +465,11 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, case Instruction::SExt: { // Compute the bits in the result that are not present in the input. unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); - + KnownZero = KnownZero.trunc(SrcBitWidth); KnownOne = KnownOne.trunc(SrcBitWidth); ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); @@ -481,7 +486,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero <<= ShiftAmt; KnownOne <<= ShiftAmt; KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0 @@ -493,10 +498,10 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { // Compute the new bits that are at the top now. uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - + // Unsigned shift right. ComputeMaskedBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); // high bits known zero. @@ -509,13 +514,13 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { // Compute the new bits that are at the top now. uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); - + // Signed shift right. ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); - + APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); if (KnownZero[BitWidth-ShiftAmt-1]) // New bits are known zero. KnownZero |= HighBits; @@ -559,7 +564,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0)) KnownOne |= ~LowBits; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); } } @@ -606,7 +611,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, unsigned Align = AI->getAlignment(); if (Align == 0 && TD) Align = TD->getABITypeAlignment(AI->getType()->getElementType()); - + if (Align > 0) KnownZero = APInt::getLowBitsSet(BitWidth, CountTrailingZeros_32(Align)); break; @@ -643,7 +648,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, LocalKnownZero.countTrailingOnes())); } } - + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ); break; } @@ -799,12 +804,11 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, KnownZero = ZeroBits[BitWidth - 1]; } -/// isPowerOfTwo - Return true if the given value is known to have exactly one +/// isKnownToBeAPowerOfTwo - Return true if the given value is known to have exactly one /// bit set when defined. For vectors return true if every element is known to /// be a power of two when defined. Supports values with integer or pointer /// types and vectors of integers. -bool llvm::isPowerOfTwo(Value *V, const DataLayout *TD, bool OrZero, - unsigned Depth) { +bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) { if (Constant *C = dyn_cast<Constant>(V)) { if (C->isNullValue()) return OrZero; @@ -831,19 +835,19 @@ bool llvm::isPowerOfTwo(Value *V, const DataLayout *TD, bool OrZero, // A shift of a power of two is a power of two or zero. if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) || match(V, m_Shr(m_Value(X), m_Value())))) - return isPowerOfTwo(X, TD, /*OrZero*/true, Depth); + return isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth); if (ZExtInst *ZI = dyn_cast<ZExtInst>(V)) - return isPowerOfTwo(ZI->getOperand(0), TD, OrZero, Depth); + return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth); if (SelectInst *SI = dyn_cast<SelectInst>(V)) - return isPowerOfTwo(SI->getTrueValue(), TD, OrZero, Depth) && - isPowerOfTwo(SI->getFalseValue(), TD, OrZero, Depth); + return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth) && + isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth); if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) { // A power of two and'd with anything is a power of two or zero. - if (isPowerOfTwo(X, TD, /*OrZero*/true, Depth) || - isPowerOfTwo(Y, TD, /*OrZero*/true, Depth)) + if (isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth) || + isKnownToBeAPowerOfTwo(Y, /*OrZero*/true, Depth)) return true; // X & (-X) is always a power of two or zero. if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X)))) @@ -856,7 +860,73 @@ bool llvm::isPowerOfTwo(Value *V, const DataLayout *TD, bool OrZero, // copying a sign bit (sdiv int_min, 2). if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) || match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) { - return isPowerOfTwo(cast<Operator>(V)->getOperand(0), TD, OrZero, Depth); + return isKnownToBeAPowerOfTwo(cast<Operator>(V)->getOperand(0), OrZero, Depth); + } + + return false; +} + +/// \brief Test whether a GEP's result is known to be non-null. +/// +/// Uses properties inherent in a GEP to try to determine whether it is known +/// to be non-null. +/// +/// Currently this routine does not support vector GEPs. +static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL, + unsigned Depth) { + if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0) + return false; + + // FIXME: Support vector-GEPs. + assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP"); + + // If the base pointer is non-null, we cannot walk to a null address with an + // inbounds GEP in address space zero. + if (isKnownNonZero(GEP->getPointerOperand(), DL, Depth)) + return true; + + // Past this, if we don't have DataLayout, we can't do much. + if (!DL) + return false; + + // Walk the GEP operands and see if any operand introduces a non-zero offset. + // If so, then the GEP cannot produce a null pointer, as doing so would + // inherently violate the inbounds contract within address space zero. + for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); + GTI != GTE; ++GTI) { + // Struct types are easy -- they must always be indexed by a constant. + if (StructType *STy = dyn_cast<StructType>(*GTI)) { + ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand()); + unsigned ElementIdx = OpC->getZExtValue(); + const StructLayout *SL = DL->getStructLayout(STy); + uint64_t ElementOffset = SL->getElementOffset(ElementIdx); + if (ElementOffset > 0) + return true; + continue; + } + + // If we have a zero-sized type, the index doesn't matter. Keep looping. + if (DL->getTypeAllocSize(GTI.getIndexedType()) == 0) + continue; + + // Fast path the constant operand case both for efficiency and so we don't + // increment Depth when just zipping down an all-constant GEP. + if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) { + if (!OpC->isZero()) + return true; + continue; + } + + // We post-increment Depth here because while isKnownNonZero increments it + // as well, when we pop back up that increment won't persist. We don't want + // to recurse 10k times just because we have 10k GEP operands. We don't + // bail completely out because we want to handle constant GEPs regardless + // of depth. + if (Depth++ >= MaxDepth) + continue; + + if (isKnownNonZero(GTI.getOperand(), DL, Depth)) + return true; } return false; @@ -881,7 +951,16 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { if (Depth++ >= MaxDepth) return false; - unsigned BitWidth = getBitWidth(V->getType(), TD); + // Check for pointer simplifications. + if (V->getType()->isPointerTy()) { + if (isKnownNonNull(V)) + return true; + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) + if (isGEPKnownNonNull(GEP, TD, Depth)) + return true; + } + + unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), TD); // X | Y != 0 if X != 0 or Y != 0. Value *X = 0, *Y = 0; @@ -955,9 +1034,9 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { } // The sum of a non-negative number and a power of two is not zero. - if (XKnownNonNegative && isPowerOfTwo(Y, TD, /*OrZero*/false, Depth)) + if (XKnownNonNegative && isKnownToBeAPowerOfTwo(Y, /*OrZero*/false, Depth)) return true; - if (YKnownNonNegative && isPowerOfTwo(X, TD, /*OrZero*/false, Depth)) + if (YKnownNonNegative && isKnownToBeAPowerOfTwo(X, /*OrZero*/false, Depth)) return true; } // X * Y. @@ -996,7 +1075,7 @@ bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout *TD, unsigned Depth) { APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); return (KnownZero & Mask) == Mask; } @@ -1026,14 +1105,14 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, if (Depth == 6) return 1; // Limit search depth. - + Operator *U = dyn_cast<Operator>(V); switch (Operator::getOpcode(V)) { default: break; case Instruction::SExt: Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp; - + case Instruction::AShr: { Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); // ashr X, C -> adds C sign bits. Vectors too. @@ -1075,38 +1154,38 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, if (Tmp == 1) return 1; // Early out. Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1); return std::min(Tmp, Tmp2); - + case Instruction::Add: // Add can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); if (Tmp == 1) return 1; // Early out. - + // Special case decrementing a value (ADD X, -1): if (ConstantInt *CRHS = dyn_cast<ConstantInt>(U->getOperand(1))) if (CRHS->isAllOnesValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - + // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) return TyBits; - + // If we are subtracting one from a positive number, there is no carry // out of the result. if (KnownZero.isNegative()) return Tmp; } - + Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); if (Tmp2 == 1) return 1; return std::min(Tmp, Tmp2)-1; - + case Instruction::Sub: Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); if (Tmp2 == 1) return 1; - + // Handle NEG. if (ConstantInt *CLHS = dyn_cast<ConstantInt>(U->getOperand(0))) if (CLHS->isNullValue()) { @@ -1116,26 +1195,26 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, // sign bits set. if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) return TyBits; - + // If the input is known to be positive (the sign bit is known clear), // the output of the NEG has the same number of sign bits as the input. if (KnownZero.isNegative()) return Tmp2; - + // Otherwise, we treat this like a SUB. } - + // Sub can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); if (Tmp == 1) return 1; // Early out. return std::min(Tmp, Tmp2)-1; - + case Instruction::PHI: { PHINode *PN = cast<PHINode>(U); // Don't analyze large in-degree PHIs. if (PN->getNumIncomingValues() > 4) break; - + // Take the minimum of all incoming values. This can't infinitely loop // because of our depth threshold. Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1); @@ -1152,13 +1231,13 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, // case for targets like X86. break; } - + // Finally, if we can prove that the top bits of the result are 0's or 1's, // use this information. APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); APInt Mask; ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); - + if (KnownZero.isNegative()) { // sign bit is 0 Mask = KnownZero; } else if (KnownOne.isNegative()) { // sign bit is 1; @@ -1167,7 +1246,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, // Nothing known. return FirstAnswer; } - + // Okay, we know that the sign bit in Mask is set. Use CLZ to determine // the number of identical bits in the top of the input value. Mask = ~Mask; @@ -1195,7 +1274,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, if (Base == 0) return false; - + if (Base == 1) { Multiple = V; return true; @@ -1211,11 +1290,11 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, if (CI && CI->getZExtValue() % Base == 0) { Multiple = ConstantInt::get(T, CI->getZExtValue() / Base); - return true; + return true; } - + if (Depth == MaxDepth) return false; // Limit search depth. - + Operator *I = dyn_cast<Operator>(V); if (!I) return false; @@ -1247,13 +1326,13 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) { if (Constant *Op1C = dyn_cast<Constant>(Op1)) if (Constant *MulC = dyn_cast<Constant>(Mul0)) { - if (Op1C->getType()->getPrimitiveSizeInBits() < + if (Op1C->getType()->getPrimitiveSizeInBits() < MulC->getType()->getPrimitiveSizeInBits()) Op1C = ConstantExpr::getZExt(Op1C, MulC->getType()); - if (Op1C->getType()->getPrimitiveSizeInBits() > + if (Op1C->getType()->getPrimitiveSizeInBits() > MulC->getType()->getPrimitiveSizeInBits()) MulC = ConstantExpr::getZExt(MulC, Op1C->getType()); - + // V == Base * (Mul0 * Op1), so return (Mul0 * Op1) Multiple = ConstantExpr::getMul(MulC, Op1C); return true; @@ -1271,13 +1350,13 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) { if (Constant *Op0C = dyn_cast<Constant>(Op0)) if (Constant *MulC = dyn_cast<Constant>(Mul1)) { - if (Op0C->getType()->getPrimitiveSizeInBits() < + if (Op0C->getType()->getPrimitiveSizeInBits() < MulC->getType()->getPrimitiveSizeInBits()) Op0C = ConstantExpr::getZExt(Op0C, MulC->getType()); - if (Op0C->getType()->getPrimitiveSizeInBits() > + if (Op0C->getType()->getPrimitiveSizeInBits() > MulC->getType()->getPrimitiveSizeInBits()) MulC = ConstantExpr::getZExt(MulC, Op0C->getType()); - + // V == Base * (Mul1 * Op0), so return (Mul1 * Op0) Multiple = ConstantExpr::getMul(MulC, Op0C); return true; @@ -1297,7 +1376,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, return false; } -/// CannotBeNegativeZero - Return true if we can prove that the specified FP +/// CannotBeNegativeZero - Return true if we can prove that the specified FP /// value is never equal to -0.0. /// /// NOTE: this function will need to be revisited when we support non-default @@ -1306,28 +1385,33 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) return !CFP->getValueAPF().isNegZero(); - + if (Depth == 6) return 1; // Limit search depth. const Operator *I = dyn_cast<Operator>(V); if (I == 0) return false; - + + // Check if the nsz fast-math flag is set + if (const FPMathOperator *FPO = dyn_cast<FPMathOperator>(I)) + if (FPO->hasNoSignedZeros()) + return true; + // (add x, 0.0) is guaranteed to return +0.0, not -0.0. - if (I->getOpcode() == Instruction::FAdd && - isa<ConstantFP>(I->getOperand(1)) && - cast<ConstantFP>(I->getOperand(1))->isNullValue()) - return true; - + if (I->getOpcode() == Instruction::FAdd) + if (ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(1))) + if (CFP->isNullValue()) + return true; + // sitofp and uitofp turn into +0.0 for zero. if (isa<SIToFPInst>(I) || isa<UIToFPInst>(I)) return true; - + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) // sqrt(-0.0) = -0.0, no other negative results are possible. if (II->getIntrinsicID() == Intrinsic::sqrt) return CannotBeNegativeZero(II->getArgOperand(0), Depth+1); - + if (const CallInst *CI = dyn_cast<CallInst>(I)) if (const Function *F = CI->getCalledFunction()) { if (F->isDeclaration()) { @@ -1342,7 +1426,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { return CannotBeNegativeZero(CI->getArgOperand(0), Depth+1); } } - + return false; } @@ -1359,9 +1443,9 @@ Value *llvm::isBytewiseValue(Value *V) { if (Constant *C = dyn_cast<Constant>(V)) if (C->isNullValue()) return Constant::getNullValue(Type::getInt8Ty(V->getContext())); - + // Constant float and double values can be handled as integer values if the - // corresponding integer value is "byteable". An important case is 0.0. + // corresponding integer value is "byteable". An important case is 0.0. if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { if (CFP->getType()->isFloatTy()) V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext())); @@ -1369,8 +1453,8 @@ Value *llvm::isBytewiseValue(Value *V) { V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext())); // Don't handle long double formats, which have strange constraints. } - - // We can handle constant integers that are power of two in size and a + + // We can handle constant integers that are power of two in size and a // multiple of 8 bits. if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { unsigned Width = CI->getBitWidth(); @@ -1384,7 +1468,7 @@ Value *llvm::isBytewiseValue(Value *V) { Val2 = Val.lshr(NextWidth); Val2 = Val2.trunc(Val.getBitWidth()/2); Val = Val.trunc(Val.getBitWidth()/2); - + // If the top/bottom halves aren't the same, reject it. if (Val != Val2) return 0; @@ -1392,7 +1476,7 @@ Value *llvm::isBytewiseValue(Value *V) { return ConstantInt::get(V->getContext(), Val); } } - + // A ConstantDataArray/Vector is splatable if all its members are equal and // also splatable. if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(V)) { @@ -1400,11 +1484,11 @@ Value *llvm::isBytewiseValue(Value *V) { Value *Val = isBytewiseValue(Elt); if (!Val) return 0; - + for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I) if (CA->getElementAsConstant(I) != Elt) return 0; - + return Val; } @@ -1428,7 +1512,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, SmallVector<unsigned, 10> &Idxs, unsigned IdxSkip, Instruction *InsertBefore) { - llvm::StructType *STy = llvm::dyn_cast<llvm::StructType>(IndexedType); + llvm::StructType *STy = dyn_cast<llvm::StructType>(IndexedType); if (STy) { // Save the original To argument so we can modify it Value *OrigTo = To; @@ -1459,7 +1543,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, // the struct's elements had a value that was inserted directly. In the latter // case, perhaps we can't determine each of the subelements individually, but // we might be able to find the complete struct somewhere. - + // Find the value that is at that particular spot Value *V = FindInsertedValue(From, Idxs); @@ -1518,7 +1602,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, if (C == 0) return 0; return FindInsertedValue(C, idx_range.slice(1), InsertBefore); } - + if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) { // Loop the indices for the insertvalue instruction in parallel with the // requested indices @@ -1543,7 +1627,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx), InsertBefore); } - + // This insert value inserts something else than what we are looking for. // See if the (aggregrate) value inserted into has the value we are // looking for, then. @@ -1558,26 +1642,26 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, makeArrayRef(req_idx, idx_range.end()), InsertBefore); } - + if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) { // If we're extracting a value from an aggregrate that was extracted from // something else, we can extract from that something else directly instead. // However, we will need to chain I's indices with the requested indices. - - // Calculate the number of indices required + + // Calculate the number of indices required unsigned size = I->getNumIndices() + idx_range.size(); // Allocate some space to put the new indices in SmallVector<unsigned, 5> Idxs; Idxs.reserve(size); // Add indices from the extract value instruction Idxs.append(I->idx_begin(), I->idx_end()); - + // Add requested indices Idxs.append(idx_range.begin(), idx_range.end()); - assert(Idxs.size() == size + assert(Idxs.size() == size && "Number of indices added not correct?"); - + return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore); } // Otherwise, we don't know (such as, extracting from a function return value @@ -1589,41 +1673,33 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, /// it can be expressed as a base pointer plus a constant offset. Return the /// base and offset to the caller. Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, - const DataLayout &TD) { - Operator *PtrOp = dyn_cast<Operator>(Ptr); - if (PtrOp == 0 || Ptr->getType()->isVectorTy()) - return Ptr; - - // Just look through bitcasts. - if (PtrOp->getOpcode() == Instruction::BitCast) - return GetPointerBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD); - - // If this is a GEP with constant indices, we can look through it. - GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp); - if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr; - - gep_type_iterator GTI = gep_type_begin(GEP); - for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E; - ++I, ++GTI) { - ConstantInt *OpC = cast<ConstantInt>(*I); - if (OpC->isZero()) continue; - - // Handle a struct and array indices which add their offset to the pointer. - if (StructType *STy = dyn_cast<StructType>(*GTI)) { - Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + const DataLayout *TD) { + // Without DataLayout, conservatively assume 64-bit offsets, which is + // the widest we support. + unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64; + APInt ByteOffset(BitWidth, 0); + while (1) { + if (Ptr->getType()->isVectorTy()) + break; + + if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) { + APInt GEPOffset(BitWidth, 0); + if (TD && !GEP->accumulateConstantOffset(*TD, GEPOffset)) + break; + ByteOffset += GEPOffset; + Ptr = GEP->getPointerOperand(); + } else if (Operator::getOpcode(Ptr) == Instruction::BitCast) { + Ptr = cast<Operator>(Ptr)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) { + if (GA->mayBeOverridden()) + break; + Ptr = GA->getAliasee(); } else { - uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); - Offset += OpC->getSExtValue()*Size; + break; } } - - // Re-sign extend from the pointer size if needed to get overflow edge cases - // right. - unsigned PtrSize = TD.getPointerSizeInBits(); - if (PtrSize < 64) - Offset = SignExtend64(Offset, PtrSize); - - return GetPointerBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD); + Offset = ByteOffset.getSExtValue(); + return Ptr; } @@ -1636,26 +1712,26 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, // Look through bitcast instructions and geps. V = V->stripPointerCasts(); - + // If the value is a GEP instructionor constant expression, treat it as an // offset. if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { // Make sure the GEP has exactly three arguments. if (GEP->getNumOperands() != 3) return false; - + // Make sure the index-ee is a pointer to array of i8. PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType()); ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType()); if (AT == 0 || !AT->getElementType()->isIntegerTy(8)) return false; - + // Check to make sure that the first operand of the GEP is an integer and // has value 0 so that we are sure we're indexing into the initializer. const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1)); if (FirstIdx == 0 || !FirstIdx->isZero()) return false; - + // If the second index isn't a ConstantInt, then this is a variable index // into the array. If this occurs, we can't say anything meaningful about // the string. @@ -1681,13 +1757,13 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, Str = ""; return true; } - + // Must be a Constant Array const ConstantDataArray *Array = dyn_cast<ConstantDataArray>(GV->getInitializer()); if (Array == 0 || !Array->isString()) return false; - + // Get the number of elements in the array uint64_t NumElts = Array->getType()->getArrayNumElements(); @@ -1696,10 +1772,10 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, if (Offset > NumElts) return false; - + // Skip over 'offset' bytes. Str = Str.substr(Offset); - + if (TrimAtNul) { // Trim off the \0 and anything after it. If the array is not nul // terminated, we just return the whole end of string. The client may know @@ -1753,7 +1829,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) { if (Len1 != Len2) return 0; return Len1; } - + // Otherwise, see if we can read the string. StringRef StrData; if (!getConstantStringInfo(V, StrData)) @@ -1940,3 +2016,19 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, return false; // Misc instructions which have effects } } + +/// isKnownNonNull - Return true if we know that the specified value is never +/// null. +bool llvm::isKnownNonNull(const Value *V) { + // Alloca never returns null, malloc might. + if (isa<AllocaInst>(V)) return true; + + // A byval argument is never null. + if (const Argument *A = dyn_cast<Argument>(V)) + return A->hasByValAttr(); + + // Global values are not null unless extern weak. + if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) + return !GV->hasExternalWeakLinkage(); + return false; +} |