diff options
Diffstat (limited to 'contrib/llvm/lib/Analysis')
58 files changed, 28065 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp new file mode 100644 index 0000000..1f2528f --- /dev/null +++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp @@ -0,0 +1,344 @@ +//===- AliasAnalysis.cpp - Generic Alias Analysis Interface Implementation -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the generic AliasAnalysis interface which is used as the +// common interface used by all clients and implementations of alias analysis. +// +// This file also implements the default version of the AliasAnalysis interface +// that is to be used when no other implementation is specified. This does some +// simple tests that detect obvious cases: two different global pointers cannot +// alias, a global cannot alias a malloc, two different mallocs cannot alias, +// etc. +// +// This alias analysis implementation really isn't very good for anything, but +// it is very fast, and makes a nice clean default implementation. Because it +// handles lots of little corner cases, other, more complex, alias analysis +// implementations may choose to rely on this pass to resolve these simple and +// easy cases. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Pass.h" +#include "llvm/BasicBlock.h" +#include "llvm/Function.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Instructions.h" +#include "llvm/Type.h" +#include "llvm/Target/TargetData.h" +using namespace llvm; + +// Register the AliasAnalysis interface, providing a nice name to refer to. +static RegisterAnalysisGroup<AliasAnalysis> Z("Alias Analysis"); +char AliasAnalysis::ID = 0; + +//===----------------------------------------------------------------------===// +// Default chaining methods +//===----------------------------------------------------------------------===// + +AliasAnalysis::AliasResult +AliasAnalysis::alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + return AA->alias(V1, V1Size, V2, V2Size); +} + +bool AliasAnalysis::pointsToConstantMemory(const Value *P) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + return AA->pointsToConstantMemory(P); +} + +void AliasAnalysis::deleteValue(Value *V) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + AA->deleteValue(V); +} + +void AliasAnalysis::copyValue(Value *From, Value *To) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + AA->copyValue(From, To); +} + +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(ImmutableCallSite CS, + const Value *P, unsigned Size) { + // Don't assert AA because BasicAA calls us in order to make use of the + // logic here. + + ModRefBehavior MRB = getModRefBehavior(CS); + if (MRB == DoesNotAccessMemory) + return NoModRef; + + ModRefResult Mask = ModRef; + if (MRB == OnlyReadsMemory) + Mask = Ref; + else if (MRB == AliasAnalysis::AccessesArguments) { + bool doesAlias = false; + for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); + AI != AE; ++AI) + if (!isNoAlias(*AI, ~0U, P, Size)) { + doesAlias = true; + break; + } + + if (!doesAlias) + return NoModRef; + } + + // If P points to a constant memory location, the call definitely could not + // modify the memory location. + if ((Mask & Mod) && pointsToConstantMemory(P)) + Mask = ModRefResult(Mask & ~Mod); + + // If this is BasicAA, don't forward. + if (!AA) return Mask; + + // Otherwise, fall back to the next AA in the chain. But we can merge + // in any mask we've managed to compute. + return ModRefResult(AA->getModRefInfo(CS, P, Size) & Mask); +} + +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { + // Don't assert AA because BasicAA calls us in order to make use of the + // logic here. + + // If CS1 or CS2 are readnone, they don't interact. + ModRefBehavior CS1B = getModRefBehavior(CS1); + if (CS1B == DoesNotAccessMemory) return NoModRef; + + ModRefBehavior CS2B = getModRefBehavior(CS2); + if (CS2B == DoesNotAccessMemory) return NoModRef; + + // If they both only read from memory, there is no dependence. + if (CS1B == OnlyReadsMemory && CS2B == OnlyReadsMemory) + return NoModRef; + + AliasAnalysis::ModRefResult Mask = ModRef; + + // If CS1 only reads memory, the only dependence on CS2 can be + // from CS1 reading memory written by CS2. + if (CS1B == OnlyReadsMemory) + Mask = ModRefResult(Mask & Ref); + + // If CS2 only access memory through arguments, accumulate the mod/ref + // information from CS1's references to the memory referenced by + // CS2's arguments. + if (CS2B == AccessesArguments) { + AliasAnalysis::ModRefResult R = NoModRef; + for (ImmutableCallSite::arg_iterator + I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) { + R = ModRefResult((R | getModRefInfo(CS1, *I, UnknownSize)) & Mask); + if (R == Mask) + break; + } + return R; + } + + // If CS1 only accesses memory through arguments, check if CS2 references + // any of the memory referenced by CS1's arguments. If not, return NoModRef. + if (CS1B == AccessesArguments) { + AliasAnalysis::ModRefResult R = NoModRef; + for (ImmutableCallSite::arg_iterator + I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) + if (getModRefInfo(CS2, *I, UnknownSize) != NoModRef) { + R = Mask; + break; + } + if (R == NoModRef) + return R; + } + + // If this is BasicAA, don't forward. + if (!AA) return Mask; + + // Otherwise, fall back to the next AA in the chain. But we can merge + // in any mask we've managed to compute. + return ModRefResult(AA->getModRefInfo(CS1, CS2) & Mask); +} + +AliasAnalysis::ModRefBehavior +AliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { + // Don't assert AA because BasicAA calls us in order to make use of the + // logic here. + + ModRefBehavior Min = UnknownModRefBehavior; + + // Call back into the alias analysis with the other form of getModRefBehavior + // to see if it can give a better response. + if (const Function *F = CS.getCalledFunction()) + Min = getModRefBehavior(F); + + // If this is BasicAA, don't forward. + if (!AA) return Min; + + // Otherwise, fall back to the next AA in the chain. But we can merge + // in any result we've managed to compute. + return std::min(AA->getModRefBehavior(CS), Min); +} + +AliasAnalysis::ModRefBehavior +AliasAnalysis::getModRefBehavior(const Function *F) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + return AA->getModRefBehavior(F); +} + + +//===----------------------------------------------------------------------===// +// AliasAnalysis non-virtual helper method implementation +//===----------------------------------------------------------------------===// + +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(const LoadInst *L, const Value *P, unsigned Size) { + // Be conservative in the face of volatile. + if (L->isVolatile()) + return ModRef; + + // If the load address doesn't alias the given address, it doesn't read + // or write the specified memory. + if (!alias(L->getOperand(0), getTypeStoreSize(L->getType()), P, Size)) + return NoModRef; + + // Otherwise, a load just reads. + return Ref; +} + +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(const StoreInst *S, const Value *P, unsigned Size) { + // Be conservative in the face of volatile. + if (S->isVolatile()) + return ModRef; + + // If the store address cannot alias the pointer in question, then the + // specified memory cannot be modified by the store. + if (!alias(S->getOperand(1), + getTypeStoreSize(S->getOperand(0)->getType()), P, Size)) + return NoModRef; + + // If the pointer is a pointer to constant memory, then it could not have been + // modified by this store. + if (pointsToConstantMemory(P)) + return NoModRef; + + // Otherwise, a store just writes. + return Mod; +} + +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(const VAArgInst *V, const Value *P, unsigned Size) { + // If the va_arg address cannot alias the pointer in question, then the + // specified memory cannot be accessed by the va_arg. + if (!alias(V->getOperand(0), UnknownSize, P, Size)) + return NoModRef; + + // If the pointer is a pointer to constant memory, then it could not have been + // modified by this va_arg. + if (pointsToConstantMemory(P)) + return NoModRef; + + // Otherwise, a va_arg reads and writes. + return ModRef; +} + + +AliasAnalysis::ModRefBehavior +AliasAnalysis::getIntrinsicModRefBehavior(unsigned iid) { +#define GET_INTRINSIC_MODREF_BEHAVIOR +#include "llvm/Intrinsics.gen" +#undef GET_INTRINSIC_MODREF_BEHAVIOR +} + +// AliasAnalysis destructor: DO NOT move this to the header file for +// AliasAnalysis or else clients of the AliasAnalysis class may not depend on +// the AliasAnalysis.o file in the current .a file, causing alias analysis +// support to not be included in the tool correctly! +// +AliasAnalysis::~AliasAnalysis() {} + +/// InitializeAliasAnalysis - Subclasses must call this method to initialize the +/// AliasAnalysis interface before any other methods are called. +/// +void AliasAnalysis::InitializeAliasAnalysis(Pass *P) { + TD = P->getAnalysisIfAvailable<TargetData>(); + AA = &P->getAnalysis<AliasAnalysis>(); +} + +// getAnalysisUsage - All alias analysis implementations should invoke this +// directly (using AliasAnalysis::getAnalysisUsage(AU)). +void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AliasAnalysis>(); // All AA's chain +} + +/// getTypeStoreSize - Return the TargetData store size for the given type, +/// if known, or a conservative value otherwise. +/// +unsigned AliasAnalysis::getTypeStoreSize(const Type *Ty) { + return TD ? TD->getTypeStoreSize(Ty) : ~0u; +} + +/// canBasicBlockModify - Return true if it is possible for execution of the +/// specified basic block to modify the value pointed to by Ptr. +/// +bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB, + const Value *Ptr, unsigned Size) { + return canInstructionRangeModify(BB.front(), BB.back(), Ptr, Size); +} + +/// canInstructionRangeModify - Return true if it is possible for the execution +/// of the specified instructions to modify the value pointed to by Ptr. The +/// instructions to consider are all of the instructions in the range of [I1,I2] +/// INCLUSIVE. I1 and I2 must be in the same basic block. +/// +bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1, + const Instruction &I2, + const Value *Ptr, unsigned Size) { + assert(I1.getParent() == I2.getParent() && + "Instructions not in same basic block!"); + BasicBlock::const_iterator I = &I1; + BasicBlock::const_iterator E = &I2; + ++E; // Convert from inclusive to exclusive range. + + for (; I != E; ++I) // Check every instruction in range + if (getModRefInfo(I, Ptr, Size) & Mod) + return true; + return false; +} + +/// isNoAliasCall - Return true if this pointer is returned by a noalias +/// function. +bool llvm::isNoAliasCall(const Value *V) { + if (isa<CallInst>(V) || isa<InvokeInst>(V)) + return ImmutableCallSite(cast<Instruction>(V)) + .paramHasAttr(0, Attribute::NoAlias); + return false; +} + +/// isIdentifiedObject - Return true if this pointer refers to a distinct and +/// identifiable object. This returns true for: +/// Global Variables and Functions (but not Global Aliases) +/// Allocas and Mallocs +/// ByVal and NoAlias Arguments +/// NoAlias returns +/// +bool llvm::isIdentifiedObject(const Value *V) { + if (isa<AllocaInst>(V)) + return true; + if (isa<GlobalValue>(V) && !isa<GlobalAlias>(V)) + return true; + if (isNoAliasCall(V)) + return true; + if (const Argument *A = dyn_cast<Argument>(V)) + return A->hasNoAliasAttr() || A->hasByValAttr(); + return false; +} + +// Because of the way .a files work, we must force the BasicAA implementation to +// be pulled in if the AliasAnalysis classes are pulled in. Otherwise we run +// the risk of AliasAnalysis being used, but the default implementation not +// being linked into the tool that uses it. +DEFINING_FILE_FOR(AliasAnalysis) diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp new file mode 100644 index 0000000..b178041 --- /dev/null +++ b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp @@ -0,0 +1,170 @@ +//===- AliasAnalysisCounter.cpp - Alias Analysis Query Counter ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass which can be used to count how many alias queries +// are being made and how the alias analysis implementation being used responds. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +static cl::opt<bool> +PrintAll("count-aa-print-all-queries", cl::ReallyHidden, cl::init(true)); +static cl::opt<bool> +PrintAllFailures("count-aa-print-all-failed-queries", cl::ReallyHidden); + +namespace { + class AliasAnalysisCounter : public ModulePass, public AliasAnalysis { + unsigned No, May, Must; + unsigned NoMR, JustRef, JustMod, MR; + Module *M; + public: + static char ID; // Class identification, replacement for typeinfo + AliasAnalysisCounter() : ModulePass(ID) { + No = May = Must = 0; + NoMR = JustRef = JustMod = MR = 0; + } + + void printLine(const char *Desc, unsigned Val, unsigned Sum) { + errs() << " " << Val << " " << Desc << " responses (" + << Val*100/Sum << "%)\n"; + } + ~AliasAnalysisCounter() { + unsigned AASum = No+May+Must; + unsigned MRSum = NoMR+JustRef+JustMod+MR; + if (AASum + MRSum) { // Print a report if any counted queries occurred... + errs() << "\n===== Alias Analysis Counter Report =====\n" + << " Analysis counted:\n" + << " " << AASum << " Total Alias Queries Performed\n"; + if (AASum) { + printLine("no alias", No, AASum); + printLine("may alias", May, AASum); + printLine("must alias", Must, AASum); + errs() << " Alias Analysis Counter Summary: " << No*100/AASum << "%/" + << May*100/AASum << "%/" << Must*100/AASum<<"%\n\n"; + } + + errs() << " " << MRSum << " Total Mod/Ref Queries Performed\n"; + if (MRSum) { + printLine("no mod/ref", NoMR, MRSum); + printLine("ref", JustRef, MRSum); + printLine("mod", JustMod, MRSum); + printLine("mod/ref", MR, MRSum); + errs() << " Mod/Ref Analysis Counter Summary: " <<NoMR*100/MRSum + << "%/" << JustRef*100/MRSum << "%/" << JustMod*100/MRSum + << "%/" << MR*100/MRSum <<"%\n\n"; + } + } + } + + bool runOnModule(Module &M) { + this->M = &M; + InitializeAliasAnalysis(this); + return false; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AliasAnalysis::getAnalysisUsage(AU); + AU.addRequired<AliasAnalysis>(); + AU.setPreservesAll(); + } + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + + // FIXME: We could count these too... + bool pointsToConstantMemory(const Value *P) { + return getAnalysis<AliasAnalysis>().pointsToConstantMemory(P); + } + + // Forwarding functions: just delegate to a real AA implementation, counting + // the number of responses... + AliasResult alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size); + + ModRefResult getModRefInfo(ImmutableCallSite CS, + const Value *P, unsigned Size); + ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + return AliasAnalysis::getModRefInfo(CS1,CS2); + } + }; +} + +char AliasAnalysisCounter::ID = 0; +INITIALIZE_AG_PASS(AliasAnalysisCounter, AliasAnalysis, "count-aa", + "Count Alias Analysis Query Responses", false, true, false); + +ModulePass *llvm::createAliasAnalysisCounterPass() { + return new AliasAnalysisCounter(); +} + +AliasAnalysis::AliasResult +AliasAnalysisCounter::alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size) { + AliasResult R = getAnalysis<AliasAnalysis>().alias(V1, V1Size, V2, V2Size); + + const char *AliasString; + switch (R) { + default: llvm_unreachable("Unknown alias type!"); + case NoAlias: No++; AliasString = "No alias"; break; + case MayAlias: May++; AliasString = "May alias"; break; + case MustAlias: Must++; AliasString = "Must alias"; break; + } + + if (PrintAll || (PrintAllFailures && R == MayAlias)) { + errs() << AliasString << ":\t"; + errs() << "[" << V1Size << "B] "; + WriteAsOperand(errs(), V1, true, M); + errs() << ", "; + errs() << "[" << V2Size << "B] "; + WriteAsOperand(errs(), V2, true, M); + errs() << "\n"; + } + + return R; +} + +AliasAnalysis::ModRefResult +AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS, + const Value *P, unsigned Size) { + ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, P, Size); + + const char *MRString; + switch (R) { + default: llvm_unreachable("Unknown mod/ref type!"); + case NoModRef: NoMR++; MRString = "NoModRef"; break; + case Ref: JustRef++; MRString = "JustRef"; break; + case Mod: JustMod++; MRString = "JustMod"; break; + case ModRef: MR++; MRString = "ModRef"; break; + } + + if (PrintAll || (PrintAllFailures && R == ModRef)) { + errs() << MRString << ": Ptr: "; + errs() << "[" << Size << "B] "; + WriteAsOperand(errs(), P, true, M); + errs() << "\t<->" << *CS.getInstruction() << '\n'; + } + return R; +} diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp new file mode 100644 index 0000000..ce363cb --- /dev/null +++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -0,0 +1,290 @@ +//===- AliasAnalysisEvaluator.cpp - Alias Analysis Accuracy Evaluator -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple N^2 alias analysis accuracy evaluator. +// Basically, for each function in the program, it simply queries to see how the +// alias analysis implementation answers alias queries between each pair of +// pointers in the function. +// +// This is inspired and adapted from code by: Naveen Neelakantam, Francesco +// Spadini, and Wojciech Stryjewski. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SetVector.h" +using namespace llvm; + +static cl::opt<bool> PrintAll("print-all-alias-modref-info", cl::ReallyHidden); + +static cl::opt<bool> PrintNoAlias("print-no-aliases", cl::ReallyHidden); +static cl::opt<bool> PrintMayAlias("print-may-aliases", cl::ReallyHidden); +static cl::opt<bool> PrintMustAlias("print-must-aliases", cl::ReallyHidden); + +static cl::opt<bool> PrintNoModRef("print-no-modref", cl::ReallyHidden); +static cl::opt<bool> PrintMod("print-mod", cl::ReallyHidden); +static cl::opt<bool> PrintRef("print-ref", cl::ReallyHidden); +static cl::opt<bool> PrintModRef("print-modref", cl::ReallyHidden); + +namespace { + class AAEval : public FunctionPass { + unsigned NoAlias, MayAlias, MustAlias; + unsigned NoModRef, Mod, Ref, ModRef; + + public: + static char ID; // Pass identification, replacement for typeid + AAEval() : FunctionPass(ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AliasAnalysis>(); + AU.setPreservesAll(); + } + + bool doInitialization(Module &M) { + NoAlias = MayAlias = MustAlias = 0; + NoModRef = Mod = Ref = ModRef = 0; + + if (PrintAll) { + PrintNoAlias = PrintMayAlias = PrintMustAlias = true; + PrintNoModRef = PrintMod = PrintRef = PrintModRef = true; + } + return false; + } + + bool runOnFunction(Function &F); + bool doFinalization(Module &M); + }; +} + +char AAEval::ID = 0; +INITIALIZE_PASS(AAEval, "aa-eval", + "Exhaustive Alias Analysis Precision Evaluator", false, true); + +FunctionPass *llvm::createAAEvalPass() { return new AAEval(); } + +static void PrintResults(const char *Msg, bool P, const Value *V1, + const Value *V2, const Module *M) { + if (P) { + std::string o1, o2; + { + raw_string_ostream os1(o1), os2(o2); + WriteAsOperand(os1, V1, true, M); + WriteAsOperand(os2, V2, true, M); + } + + if (o2 < o1) + std::swap(o1, o2); + errs() << " " << Msg << ":\t" + << o1 << ", " + << o2 << "\n"; + } +} + +static inline void +PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr, + Module *M) { + if (P) { + errs() << " " << Msg << ": Ptr: "; + WriteAsOperand(errs(), Ptr, true, M); + errs() << "\t<->" << *I << '\n'; + } +} + +static inline void +PrintModRefResults(const char *Msg, bool P, CallSite CSA, CallSite CSB, + Module *M) { + if (P) { + errs() << " " << Msg << ": " << *CSA.getInstruction() + << " <-> " << *CSB.getInstruction() << '\n'; + } +} + +static inline bool isInterestingPointer(Value *V) { + return V->getType()->isPointerTy() + && !isa<ConstantPointerNull>(V); +} + +bool AAEval::runOnFunction(Function &F) { + AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); + + SetVector<Value *> Pointers; + SetVector<CallSite> CallSites; + + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) + if (I->getType()->isPointerTy()) // Add all pointer arguments. + Pointers.insert(I); + + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + if (I->getType()->isPointerTy()) // Add all pointer instructions. + Pointers.insert(&*I); + Instruction &Inst = *I; + if (CallSite CS = cast<Value>(&Inst)) { + Value *Callee = CS.getCalledValue(); + // Skip actual functions for direct function calls. + if (!isa<Function>(Callee) && isInterestingPointer(Callee)) + Pointers.insert(Callee); + // Consider formals. + for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); + AI != AE; ++AI) + if (isInterestingPointer(*AI)) + Pointers.insert(*AI); + CallSites.insert(CS); + } else { + // Consider all operands. + for (Instruction::op_iterator OI = Inst.op_begin(), OE = Inst.op_end(); + OI != OE; ++OI) + if (isInterestingPointer(*OI)) + Pointers.insert(*OI); + } + } + + if (PrintNoAlias || PrintMayAlias || PrintMustAlias || + PrintNoModRef || PrintMod || PrintRef || PrintModRef) + errs() << "Function: " << F.getName() << ": " << Pointers.size() + << " pointers, " << CallSites.size() << " call sites\n"; + + // iterate over the worklist, and run the full (n^2)/2 disambiguations + for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end(); + I1 != E; ++I1) { + unsigned I1Size = ~0u; + const Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType(); + if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy); + + for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) { + unsigned I2Size = ~0u; + const Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType(); + if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy); + + switch (AA.alias(*I1, I1Size, *I2, I2Size)) { + case AliasAnalysis::NoAlias: + PrintResults("NoAlias", PrintNoAlias, *I1, *I2, F.getParent()); + ++NoAlias; break; + case AliasAnalysis::MayAlias: + PrintResults("MayAlias", PrintMayAlias, *I1, *I2, F.getParent()); + ++MayAlias; break; + case AliasAnalysis::MustAlias: + PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent()); + ++MustAlias; break; + default: + errs() << "Unknown alias query result!\n"; + } + } + } + + // Mod/ref alias analysis: compare all pairs of calls and values + for (SetVector<CallSite>::iterator C = CallSites.begin(), + Ce = CallSites.end(); C != Ce; ++C) { + Instruction *I = C->getInstruction(); + + for (SetVector<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end(); + V != Ve; ++V) { + unsigned Size = ~0u; + const Type *ElTy = cast<PointerType>((*V)->getType())->getElementType(); + if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy); + + switch (AA.getModRefInfo(*C, *V, Size)) { + case AliasAnalysis::NoModRef: + PrintModRefResults("NoModRef", PrintNoModRef, I, *V, F.getParent()); + ++NoModRef; break; + case AliasAnalysis::Mod: + PrintModRefResults("Just Mod", PrintMod, I, *V, F.getParent()); + ++Mod; break; + case AliasAnalysis::Ref: + PrintModRefResults("Just Ref", PrintRef, I, *V, F.getParent()); + ++Ref; break; + case AliasAnalysis::ModRef: + PrintModRefResults("Both ModRef", PrintModRef, I, *V, F.getParent()); + ++ModRef; break; + default: + errs() << "Unknown alias query result!\n"; + } + } + } + + // Mod/ref alias analysis: compare all pairs of calls + for (SetVector<CallSite>::iterator C = CallSites.begin(), + Ce = CallSites.end(); C != Ce; ++C) { + for (SetVector<CallSite>::iterator D = CallSites.begin(); D != Ce; ++D) { + if (D == C) + continue; + switch (AA.getModRefInfo(*C, *D)) { + case AliasAnalysis::NoModRef: + PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent()); + ++NoModRef; break; + case AliasAnalysis::Mod: + PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent()); + ++Mod; break; + case AliasAnalysis::Ref: + PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent()); + ++Ref; break; + case AliasAnalysis::ModRef: + PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent()); + ++ModRef; break; + } + } + } + + return false; +} + +static void PrintPercent(unsigned Num, unsigned Sum) { + errs() << "(" << Num*100ULL/Sum << "." + << ((Num*1000ULL/Sum) % 10) << "%)\n"; +} + +bool AAEval::doFinalization(Module &M) { + unsigned AliasSum = NoAlias + MayAlias + MustAlias; + errs() << "===== Alias Analysis Evaluator Report =====\n"; + if (AliasSum == 0) { + errs() << " Alias Analysis Evaluator Summary: No pointers!\n"; + } else { + errs() << " " << AliasSum << " Total Alias Queries Performed\n"; + errs() << " " << NoAlias << " no alias responses "; + PrintPercent(NoAlias, AliasSum); + errs() << " " << MayAlias << " may alias responses "; + PrintPercent(MayAlias, AliasSum); + errs() << " " << MustAlias << " must alias responses "; + PrintPercent(MustAlias, AliasSum); + errs() << " Alias Analysis Evaluator Pointer Alias Summary: " + << NoAlias*100/AliasSum << "%/" << MayAlias*100/AliasSum << "%/" + << MustAlias*100/AliasSum << "%\n"; + } + + // Display the summary for mod/ref analysis + unsigned ModRefSum = NoModRef + Mod + Ref + ModRef; + if (ModRefSum == 0) { + errs() << " Alias Analysis Mod/Ref Evaluator Summary: no mod/ref!\n"; + } else { + errs() << " " << ModRefSum << " Total ModRef Queries Performed\n"; + errs() << " " << NoModRef << " no mod/ref responses "; + PrintPercent(NoModRef, ModRefSum); + errs() << " " << Mod << " mod responses "; + PrintPercent(Mod, ModRefSum); + errs() << " " << Ref << " ref responses "; + PrintPercent(Ref, ModRefSum); + errs() << " " << ModRef << " mod & ref responses "; + PrintPercent(ModRef, ModRefSum); + errs() << " Alias Analysis Evaluator Mod/Ref Summary: " + << NoModRef*100/ModRefSum << "%/" << Mod*100/ModRefSum << "%/" + << Ref*100/ModRefSum << "%/" << ModRef*100/ModRefSum << "%\n"; + } + + return false; +} diff --git a/contrib/llvm/lib/Analysis/AliasDebugger.cpp b/contrib/llvm/lib/Analysis/AliasDebugger.cpp new file mode 100644 index 0000000..b9fe646 --- /dev/null +++ b/contrib/llvm/lib/Analysis/AliasDebugger.cpp @@ -0,0 +1,135 @@ +//===- AliasDebugger.cpp - Simple Alias Analysis Use Checker --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This simple pass checks alias analysis users to ensure that if they +// create a new value, they do not query AA without informing it of the value. +// It acts as a shim over any other AA pass you want. +// +// Yes keeping track of every value in the program is expensive, but this is +// a debugging pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Instructions.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include <set> +using namespace llvm; + +namespace { + + class AliasDebugger : public ModulePass, public AliasAnalysis { + + //What we do is simple. Keep track of every value the AA could + //know about, and verify that queries are one of those. + //A query to a value that didn't exist when the AA was created + //means someone forgot to update the AA when creating new values + + std::set<const Value*> Vals; + + public: + static char ID; // Class identification, replacement for typeinfo + AliasDebugger() : ModulePass(ID) {} + + bool runOnModule(Module &M) { + InitializeAliasAnalysis(this); // set up super class + + for(Module::global_iterator I = M.global_begin(), + E = M.global_end(); I != E; ++I) { + Vals.insert(&*I); + for (User::const_op_iterator OI = I->op_begin(), + OE = I->op_end(); OI != OE; ++OI) + Vals.insert(*OI); + } + + for(Module::iterator I = M.begin(), + E = M.end(); I != E; ++I){ + Vals.insert(&*I); + if(!I->isDeclaration()) { + for (Function::arg_iterator AI = I->arg_begin(), AE = I->arg_end(); + AI != AE; ++AI) + Vals.insert(&*AI); + for (Function::const_iterator FI = I->begin(), FE = I->end(); + FI != FE; ++FI) + for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end(); + BI != BE; ++BI) { + Vals.insert(&*BI); + for (User::const_op_iterator OI = BI->op_begin(), + OE = BI->op_end(); OI != OE; ++OI) + Vals.insert(*OI); + } + } + + } + return false; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AliasAnalysis::getAnalysisUsage(AU); + AU.setPreservesAll(); // Does not transform code + } + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + + //------------------------------------------------ + // Implement the AliasAnalysis API + // + AliasResult alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size) { + assert(Vals.find(V1) != Vals.end() && "Never seen value in AA before"); + assert(Vals.find(V2) != Vals.end() && "Never seen value in AA before"); + return AliasAnalysis::alias(V1, V1Size, V2, V2Size); + } + + ModRefResult getModRefInfo(ImmutableCallSite CS, + const Value *P, unsigned Size) { + assert(Vals.find(P) != Vals.end() && "Never seen value in AA before"); + return AliasAnalysis::getModRefInfo(CS, P, Size); + } + + ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + return AliasAnalysis::getModRefInfo(CS1,CS2); + } + + bool pointsToConstantMemory(const Value *P) { + assert(Vals.find(P) != Vals.end() && "Never seen value in AA before"); + return AliasAnalysis::pointsToConstantMemory(P); + } + + virtual void deleteValue(Value *V) { + assert(Vals.find(V) != Vals.end() && "Never seen value in AA before"); + AliasAnalysis::deleteValue(V); + } + virtual void copyValue(Value *From, Value *To) { + Vals.insert(To); + AliasAnalysis::copyValue(From, To); + } + + }; +} + +char AliasDebugger::ID = 0; +INITIALIZE_AG_PASS(AliasDebugger, AliasAnalysis, "debug-aa", + "AA use debugger", false, true, false); + +Pass *llvm::createAliasDebugger() { return new AliasDebugger(); } + diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp new file mode 100644 index 0000000..e74543b --- /dev/null +++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp @@ -0,0 +1,611 @@ +//===- AliasSetTracker.cpp - Alias Sets Tracker implementation-------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AliasSetTracker and AliasSet classes. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Pass.h" +#include "llvm/Type.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +/// mergeSetIn - Merge the specified alias set into this alias set. +/// +void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) { + assert(!AS.Forward && "Alias set is already forwarding!"); + assert(!Forward && "This set is a forwarding set!!"); + + // Update the alias and access types of this set... + AccessTy |= AS.AccessTy; + AliasTy |= AS.AliasTy; + Volatile |= AS.Volatile; + + if (AliasTy == MustAlias) { + // Check that these two merged sets really are must aliases. Since both + // used to be must-alias sets, we can just check any pointer from each set + // for aliasing. + AliasAnalysis &AA = AST.getAliasAnalysis(); + PointerRec *L = getSomePointer(); + PointerRec *R = AS.getSomePointer(); + + // If the pointers are not a must-alias pair, this set becomes a may alias. + if (AA.alias(L->getValue(), L->getSize(), R->getValue(), R->getSize()) + != AliasAnalysis::MustAlias) + AliasTy = MayAlias; + } + + if (CallSites.empty()) { // Merge call sites... + if (!AS.CallSites.empty()) + std::swap(CallSites, AS.CallSites); + } else if (!AS.CallSites.empty()) { + CallSites.insert(CallSites.end(), AS.CallSites.begin(), AS.CallSites.end()); + AS.CallSites.clear(); + } + + AS.Forward = this; // Forward across AS now... + addRef(); // AS is now pointing to us... + + // Merge the list of constituent pointers... + if (AS.PtrList) { + *PtrListEnd = AS.PtrList; + AS.PtrList->setPrevInList(PtrListEnd); + PtrListEnd = AS.PtrListEnd; + + AS.PtrList = 0; + AS.PtrListEnd = &AS.PtrList; + assert(*AS.PtrListEnd == 0 && "End of list is not null?"); + } +} + +void AliasSetTracker::removeAliasSet(AliasSet *AS) { + if (AliasSet *Fwd = AS->Forward) { + Fwd->dropRef(*this); + AS->Forward = 0; + } + AliasSets.erase(AS); +} + +void AliasSet::removeFromTracker(AliasSetTracker &AST) { + assert(RefCount == 0 && "Cannot remove non-dead alias set from tracker!"); + AST.removeAliasSet(this); +} + +void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry, + unsigned Size, bool KnownMustAlias) { + assert(!Entry.hasAliasSet() && "Entry already in set!"); + + // Check to see if we have to downgrade to _may_ alias. + if (isMustAlias() && !KnownMustAlias) + if (PointerRec *P = getSomePointer()) { + AliasAnalysis &AA = AST.getAliasAnalysis(); + AliasAnalysis::AliasResult Result = + AA.alias(P->getValue(), P->getSize(), Entry.getValue(), Size); + if (Result == AliasAnalysis::MayAlias) + AliasTy = MayAlias; + else // First entry of must alias must have maximum size! + P->updateSize(Size); + assert(Result != AliasAnalysis::NoAlias && "Cannot be part of must set!"); + } + + Entry.setAliasSet(this); + Entry.updateSize(Size); + + // Add it to the end of the list... + assert(*PtrListEnd == 0 && "End of list is not null?"); + *PtrListEnd = &Entry; + PtrListEnd = Entry.setPrevInList(PtrListEnd); + assert(*PtrListEnd == 0 && "End of list is not null?"); + addRef(); // Entry points to alias set. +} + +void AliasSet::addCallSite(CallSite CS, AliasAnalysis &AA) { + CallSites.push_back(CS.getInstruction()); + + AliasAnalysis::ModRefBehavior Behavior = AA.getModRefBehavior(CS); + if (Behavior == AliasAnalysis::DoesNotAccessMemory) + return; + else if (Behavior == AliasAnalysis::OnlyReadsMemory) { + AliasTy = MayAlias; + AccessTy |= Refs; + return; + } + + // FIXME: This should use mod/ref information to make this not suck so bad + AliasTy = MayAlias; + AccessTy = ModRef; +} + +/// aliasesPointer - Return true if the specified pointer "may" (or must) +/// alias one of the members in the set. +/// +bool AliasSet::aliasesPointer(const Value *Ptr, unsigned Size, + AliasAnalysis &AA) const { + if (AliasTy == MustAlias) { + assert(CallSites.empty() && "Illegal must alias set!"); + + // If this is a set of MustAliases, only check to see if the pointer aliases + // SOME value in the set. + PointerRec *SomePtr = getSomePointer(); + assert(SomePtr && "Empty must-alias set??"); + return AA.alias(SomePtr->getValue(), SomePtr->getSize(), Ptr, Size); + } + + // If this is a may-alias set, we have to check all of the pointers in the set + // to be sure it doesn't alias the set... + for (iterator I = begin(), E = end(); I != E; ++I) + if (AA.alias(Ptr, Size, I.getPointer(), I.getSize())) + return true; + + // Check the call sites list and invoke list... + if (!CallSites.empty()) { + for (unsigned i = 0, e = CallSites.size(); i != e; ++i) + if (AA.getModRefInfo(CallSites[i], Ptr, Size) != AliasAnalysis::NoModRef) + return true; + } + + return false; +} + +bool AliasSet::aliasesCallSite(CallSite CS, AliasAnalysis &AA) const { + if (AA.doesNotAccessMemory(CS)) + return false; + + for (unsigned i = 0, e = CallSites.size(); i != e; ++i) { + if (AA.getModRefInfo(getCallSite(i), CS) != AliasAnalysis::NoModRef || + AA.getModRefInfo(CS, getCallSite(i)) != AliasAnalysis::NoModRef) + return true; + } + + for (iterator I = begin(), E = end(); I != E; ++I) + if (AA.getModRefInfo(CS, I.getPointer(), I.getSize()) != + AliasAnalysis::NoModRef) + return true; + + return false; +} + +void AliasSetTracker::clear() { + // Delete all the PointerRec entries. + for (PointerMapType::iterator I = PointerMap.begin(), E = PointerMap.end(); + I != E; ++I) + I->second->eraseFromList(); + + PointerMap.clear(); + + // The alias sets should all be clear now. + AliasSets.clear(); +} + + +/// findAliasSetForPointer - Given a pointer, find the one alias set to put the +/// instruction referring to the pointer into. If there are multiple alias sets +/// that may alias the pointer, merge them together and return the unified set. +/// +AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr, + unsigned Size) { + AliasSet *FoundSet = 0; + for (iterator I = begin(), E = end(); I != E; ++I) { + if (I->Forward || !I->aliasesPointer(Ptr, Size, AA)) continue; + + if (FoundSet == 0) { // If this is the first alias set ptr can go into. + FoundSet = I; // Remember it. + } else { // Otherwise, we must merge the sets. + FoundSet->mergeSetIn(*I, *this); // Merge in contents. + } + } + + return FoundSet; +} + +/// containsPointer - Return true if the specified location is represented by +/// this alias set, false otherwise. This does not modify the AST object or +/// alias sets. +bool AliasSetTracker::containsPointer(Value *Ptr, unsigned Size) const { + for (const_iterator I = begin(), E = end(); I != E; ++I) + if (!I->Forward && I->aliasesPointer(Ptr, Size, AA)) + return true; + return false; +} + + + +AliasSet *AliasSetTracker::findAliasSetForCallSite(CallSite CS) { + AliasSet *FoundSet = 0; + for (iterator I = begin(), E = end(); I != E; ++I) { + if (I->Forward || !I->aliasesCallSite(CS, AA)) + continue; + + if (FoundSet == 0) // If this is the first alias set ptr can go into. + FoundSet = I; // Remember it. + else if (!I->Forward) // Otherwise, we must merge the sets. + FoundSet->mergeSetIn(*I, *this); // Merge in contents. + } + return FoundSet; +} + + + + +/// getAliasSetForPointer - Return the alias set that the specified pointer +/// lives in. +AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, unsigned Size, + bool *New) { + AliasSet::PointerRec &Entry = getEntryFor(Pointer); + + // Check to see if the pointer is already known. + if (Entry.hasAliasSet()) { + Entry.updateSize(Size); + // Return the set! + return *Entry.getAliasSet(*this)->getForwardedTarget(*this); + } + + if (AliasSet *AS = findAliasSetForPointer(Pointer, Size)) { + // Add it to the alias set it aliases. + AS->addPointer(*this, Entry, Size); + return *AS; + } + + if (New) *New = true; + // Otherwise create a new alias set to hold the loaded pointer. + AliasSets.push_back(new AliasSet()); + AliasSets.back().addPointer(*this, Entry, Size); + return AliasSets.back(); +} + +bool AliasSetTracker::add(Value *Ptr, unsigned Size) { + bool NewPtr; + addPointer(Ptr, Size, AliasSet::NoModRef, NewPtr); + return NewPtr; +} + + +bool AliasSetTracker::add(LoadInst *LI) { + bool NewPtr; + AliasSet &AS = addPointer(LI->getOperand(0), + AA.getTypeStoreSize(LI->getType()), + AliasSet::Refs, NewPtr); + if (LI->isVolatile()) AS.setVolatile(); + return NewPtr; +} + +bool AliasSetTracker::add(StoreInst *SI) { + bool NewPtr; + Value *Val = SI->getOperand(0); + AliasSet &AS = addPointer(SI->getOperand(1), + AA.getTypeStoreSize(Val->getType()), + AliasSet::Mods, NewPtr); + if (SI->isVolatile()) AS.setVolatile(); + return NewPtr; +} + +bool AliasSetTracker::add(VAArgInst *VAAI) { + bool NewPtr; + addPointer(VAAI->getOperand(0), ~0, AliasSet::ModRef, NewPtr); + return NewPtr; +} + + +bool AliasSetTracker::add(CallSite CS) { + if (isa<DbgInfoIntrinsic>(CS.getInstruction())) + return true; // Ignore DbgInfo Intrinsics. + if (AA.doesNotAccessMemory(CS)) + return true; // doesn't alias anything + + AliasSet *AS = findAliasSetForCallSite(CS); + if (AS) { + AS->addCallSite(CS, AA); + return false; + } + AliasSets.push_back(new AliasSet()); + AS = &AliasSets.back(); + AS->addCallSite(CS, AA); + return true; +} + +bool AliasSetTracker::add(Instruction *I) { + // Dispatch to one of the other add methods. + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + return add(LI); + if (StoreInst *SI = dyn_cast<StoreInst>(I)) + return add(SI); + if (CallInst *CI = dyn_cast<CallInst>(I)) + return add(CI); + if (InvokeInst *II = dyn_cast<InvokeInst>(I)) + return add(II); + if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I)) + return add(VAAI); + return true; +} + +void AliasSetTracker::add(BasicBlock &BB) { + for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) + add(I); +} + +void AliasSetTracker::add(const AliasSetTracker &AST) { + assert(&AA == &AST.AA && + "Merging AliasSetTracker objects with different Alias Analyses!"); + + // Loop over all of the alias sets in AST, adding the pointers contained + // therein into the current alias sets. This can cause alias sets to be + // merged together in the current AST. + for (const_iterator I = AST.begin(), E = AST.end(); I != E; ++I) { + if (I->Forward) continue; // Ignore forwarding alias sets + + AliasSet &AS = const_cast<AliasSet&>(*I); + + // If there are any call sites in the alias set, add them to this AST. + for (unsigned i = 0, e = AS.CallSites.size(); i != e; ++i) + add(AS.CallSites[i]); + + // Loop over all of the pointers in this alias set. + bool X; + for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) { + AliasSet &NewAS = addPointer(ASI.getPointer(), ASI.getSize(), + (AliasSet::AccessType)AS.AccessTy, X); + if (AS.isVolatile()) NewAS.setVolatile(); + } + } +} + +/// remove - Remove the specified (potentially non-empty) alias set from the +/// tracker. +void AliasSetTracker::remove(AliasSet &AS) { + // Drop all call sites. + AS.CallSites.clear(); + + // Clear the alias set. + unsigned NumRefs = 0; + while (!AS.empty()) { + AliasSet::PointerRec *P = AS.PtrList; + + Value *ValToRemove = P->getValue(); + + // Unlink and delete entry from the list of values. + P->eraseFromList(); + + // Remember how many references need to be dropped. + ++NumRefs; + + // Finally, remove the entry. + PointerMap.erase(ValToRemove); + } + + // Stop using the alias set, removing it. + AS.RefCount -= NumRefs; + if (AS.RefCount == 0) + AS.removeFromTracker(*this); +} + +bool AliasSetTracker::remove(Value *Ptr, unsigned Size) { + AliasSet *AS = findAliasSetForPointer(Ptr, Size); + if (!AS) return false; + remove(*AS); + return true; +} + +bool AliasSetTracker::remove(LoadInst *LI) { + unsigned Size = AA.getTypeStoreSize(LI->getType()); + AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size); + if (!AS) return false; + remove(*AS); + return true; +} + +bool AliasSetTracker::remove(StoreInst *SI) { + unsigned Size = AA.getTypeStoreSize(SI->getOperand(0)->getType()); + AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size); + if (!AS) return false; + remove(*AS); + return true; +} + +bool AliasSetTracker::remove(VAArgInst *VAAI) { + AliasSet *AS = findAliasSetForPointer(VAAI->getOperand(0), ~0); + if (!AS) return false; + remove(*AS); + return true; +} + +bool AliasSetTracker::remove(CallSite CS) { + if (AA.doesNotAccessMemory(CS)) + return false; // doesn't alias anything + + AliasSet *AS = findAliasSetForCallSite(CS); + if (!AS) return false; + remove(*AS); + return true; +} + +bool AliasSetTracker::remove(Instruction *I) { + // Dispatch to one of the other remove methods... + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + return remove(LI); + if (StoreInst *SI = dyn_cast<StoreInst>(I)) + return remove(SI); + if (CallInst *CI = dyn_cast<CallInst>(I)) + return remove(CI); + if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I)) + return remove(VAAI); + return true; +} + + +// deleteValue method - This method is used to remove a pointer value from the +// AliasSetTracker entirely. It should be used when an instruction is deleted +// from the program to update the AST. If you don't use this, you would have +// dangling pointers to deleted instructions. +// +void AliasSetTracker::deleteValue(Value *PtrVal) { + // Notify the alias analysis implementation that this value is gone. + AA.deleteValue(PtrVal); + + // If this is a call instruction, remove the callsite from the appropriate + // AliasSet (if present). + if (CallSite CS = PtrVal) { + if (!AA.doesNotAccessMemory(CS)) { + // Scan all the alias sets to see if this call site is contained. + for (iterator I = begin(), E = end(); I != E; ++I) { + if (I->Forward) continue; + + I->removeCallSite(CS); + } + } + } + + // First, look up the PointerRec for this pointer. + PointerMapType::iterator I = PointerMap.find(PtrVal); + if (I == PointerMap.end()) return; // Noop + + // If we found one, remove the pointer from the alias set it is in. + AliasSet::PointerRec *PtrValEnt = I->second; + AliasSet *AS = PtrValEnt->getAliasSet(*this); + + // Unlink and delete from the list of values. + PtrValEnt->eraseFromList(); + + // Stop using the alias set. + AS->dropRef(*this); + + PointerMap.erase(I); +} + +// copyValue - This method should be used whenever a preexisting value in the +// program is copied or cloned, introducing a new value. Note that it is ok for +// clients that use this method to introduce the same value multiple times: if +// the tracker already knows about a value, it will ignore the request. +// +void AliasSetTracker::copyValue(Value *From, Value *To) { + // Notify the alias analysis implementation that this value is copied. + AA.copyValue(From, To); + + // First, look up the PointerRec for this pointer. + PointerMapType::iterator I = PointerMap.find(From); + if (I == PointerMap.end()) + return; // Noop + assert(I->second->hasAliasSet() && "Dead entry?"); + + AliasSet::PointerRec &Entry = getEntryFor(To); + if (Entry.hasAliasSet()) return; // Already in the tracker! + + // Add it to the alias set it aliases... + I = PointerMap.find(From); + AliasSet *AS = I->second->getAliasSet(*this); + AS->addPointer(*this, Entry, I->second->getSize(), true); +} + + + +//===----------------------------------------------------------------------===// +// AliasSet/AliasSetTracker Printing Support +//===----------------------------------------------------------------------===// + +void AliasSet::print(raw_ostream &OS) const { + OS << " AliasSet[" << (void*)this << ", " << RefCount << "] "; + OS << (AliasTy == MustAlias ? "must" : "may") << " alias, "; + switch (AccessTy) { + case NoModRef: OS << "No access "; break; + case Refs : OS << "Ref "; break; + case Mods : OS << "Mod "; break; + case ModRef : OS << "Mod/Ref "; break; + default: llvm_unreachable("Bad value for AccessTy!"); + } + if (isVolatile()) OS << "[volatile] "; + if (Forward) + OS << " forwarding to " << (void*)Forward; + + + if (!empty()) { + OS << "Pointers: "; + for (iterator I = begin(), E = end(); I != E; ++I) { + if (I != begin()) OS << ", "; + WriteAsOperand(OS << "(", I.getPointer()); + OS << ", " << I.getSize() << ")"; + } + } + if (!CallSites.empty()) { + OS << "\n " << CallSites.size() << " Call Sites: "; + for (unsigned i = 0, e = CallSites.size(); i != e; ++i) { + if (i) OS << ", "; + WriteAsOperand(OS, CallSites[i]); + } + } + OS << "\n"; +} + +void AliasSetTracker::print(raw_ostream &OS) const { + OS << "Alias Set Tracker: " << AliasSets.size() << " alias sets for " + << PointerMap.size() << " pointer values.\n"; + for (const_iterator I = begin(), E = end(); I != E; ++I) + I->print(OS); + OS << "\n"; +} + +void AliasSet::dump() const { print(dbgs()); } +void AliasSetTracker::dump() const { print(dbgs()); } + +//===----------------------------------------------------------------------===// +// ASTCallbackVH Class Implementation +//===----------------------------------------------------------------------===// + +void AliasSetTracker::ASTCallbackVH::deleted() { + assert(AST && "ASTCallbackVH called with a null AliasSetTracker!"); + AST->deleteValue(getValPtr()); + // this now dangles! +} + +AliasSetTracker::ASTCallbackVH::ASTCallbackVH(Value *V, AliasSetTracker *ast) + : CallbackVH(V), AST(ast) {} + +AliasSetTracker::ASTCallbackVH & +AliasSetTracker::ASTCallbackVH::operator=(Value *V) { + return *this = ASTCallbackVH(V, AST); +} + +//===----------------------------------------------------------------------===// +// AliasSetPrinter Pass +//===----------------------------------------------------------------------===// + +namespace { + class AliasSetPrinter : public FunctionPass { + AliasSetTracker *Tracker; + public: + static char ID; // Pass identification, replacement for typeid + AliasSetPrinter() : FunctionPass(ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<AliasAnalysis>(); + } + + virtual bool runOnFunction(Function &F) { + Tracker = new AliasSetTracker(getAnalysis<AliasAnalysis>()); + + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) + Tracker->add(&*I); + Tracker->print(errs()); + delete Tracker; + return false; + } + }; +} + +char AliasSetPrinter::ID = 0; +INITIALIZE_PASS(AliasSetPrinter, "print-alias-sets", + "Alias Set Printer", false, true); diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp new file mode 100644 index 0000000..398dec7 --- /dev/null +++ b/contrib/llvm/lib/Analysis/Analysis.cpp @@ -0,0 +1,43 @@ +//===-- Analysis.cpp ------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm-c/Analysis.h" +#include "llvm/Analysis/Verifier.h" +#include <cstring> + +using namespace llvm; + +LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action, + char **OutMessages) { + std::string Messages; + + LLVMBool Result = verifyModule(*unwrap(M), + static_cast<VerifierFailureAction>(Action), + OutMessages? &Messages : 0); + + if (OutMessages) + *OutMessages = strdup(Messages.c_str()); + + return Result; +} + +LLVMBool LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action) { + return verifyFunction(*unwrap<Function>(Fn), + static_cast<VerifierFailureAction>(Action)); +} + +void LLVMViewFunctionCFG(LLVMValueRef Fn) { + Function *F = unwrap<Function>(Fn); + F->viewCFG(); +} + +void LLVMViewFunctionCFGOnly(LLVMValueRef Fn) { + Function *F = unwrap<Function>(Fn); + F->viewCFGOnly(); +} diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp new file mode 100644 index 0000000..113c72b --- /dev/null +++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -0,0 +1,1083 @@ +//===- BasicAliasAnalysis.cpp - Local Alias Analysis Impl -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the default implementation of the Alias Analysis interface +// that simply implements a few identities (two different globals cannot alias, +// etc), but otherwise does no analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalAlias.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Operator.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include <algorithm> +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Useful predicates +//===----------------------------------------------------------------------===// + +/// isKnownNonNull - Return true if we know that the specified value is never +/// null. +static bool isKnownNonNull(const Value *V) { + // Alloca never returns null, malloc might. + if (isa<AllocaInst>(V)) return true; + + // A byval argument is never null. + if (const Argument *A = dyn_cast<Argument>(V)) + return A->hasByValAttr(); + + // Global values are not null unless extern weak. + if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) + return !GV->hasExternalWeakLinkage(); + return false; +} + +/// isNonEscapingLocalObject - Return true if the pointer is to a function-local +/// object that never escapes from the function. +static bool isNonEscapingLocalObject(const Value *V) { + // If this is a local allocation, check to see if it escapes. + if (isa<AllocaInst>(V) || isNoAliasCall(V)) + // Set StoreCaptures to True so that we can assume in our callers that the + // pointer is not the result of a load instruction. Currently + // PointerMayBeCaptured doesn't have any special analysis for the + // StoreCaptures=false case; if it did, our callers could be refined to be + // more precise. + return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true); + + // If this is an argument that corresponds to a byval or noalias argument, + // then it has not escaped before entering the function. Check if it escapes + // inside the function. + if (const Argument *A = dyn_cast<Argument>(V)) + if (A->hasByValAttr() || A->hasNoAliasAttr()) { + // Don't bother analyzing arguments already known not to escape. + if (A->hasNoCaptureAttr()) + return true; + return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true); + } + return false; +} + +/// isEscapeSource - Return true if the pointer is one which would have +/// been considered an escape by isNonEscapingLocalObject. +static bool isEscapeSource(const Value *V) { + if (isa<CallInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V)) + return true; + + // The load case works because isNonEscapingLocalObject considers all + // stores to be escapes (it passes true for the StoreCaptures argument + // to PointerMayBeCaptured). + if (isa<LoadInst>(V)) + return true; + + return false; +} + +/// isObjectSmallerThan - Return true if we can prove that the object specified +/// by V is smaller than Size. +static bool isObjectSmallerThan(const Value *V, unsigned Size, + const TargetData &TD) { + const Type *AccessTy; + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { + AccessTy = GV->getType()->getElementType(); + } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + if (!AI->isArrayAllocation()) + AccessTy = AI->getType()->getElementType(); + else + return false; + } else if (const CallInst* CI = extractMallocCall(V)) { + if (!isArrayMalloc(V, &TD)) + // The size is the argument to the malloc call. + if (const ConstantInt* C = dyn_cast<ConstantInt>(CI->getArgOperand(0))) + return (C->getZExtValue() < Size); + return false; + } else if (const Argument *A = dyn_cast<Argument>(V)) { + if (A->hasByValAttr()) + AccessTy = cast<PointerType>(A->getType())->getElementType(); + else + return false; + } else { + return false; + } + + if (AccessTy->isSized()) + return TD.getTypeAllocSize(AccessTy) < Size; + return false; +} + +//===----------------------------------------------------------------------===// +// NoAA Pass +//===----------------------------------------------------------------------===// + +namespace { + /// NoAA - This class implements the -no-aa pass, which always returns "I + /// don't know" for alias queries. NoAA is unlike other alias analysis + /// implementations, in that it does not chain to a previous analysis. As + /// such it doesn't follow many of the rules that other alias analyses must. + /// + struct NoAA : public ImmutablePass, public AliasAnalysis { + static char ID; // Class identification, replacement for typeinfo + NoAA() : ImmutablePass(ID) {} + explicit NoAA(char &PID) : ImmutablePass(PID) { } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + } + + virtual void initializePass() { + TD = getAnalysisIfAvailable<TargetData>(); + } + + virtual AliasResult alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size) { + return MayAlias; + } + + virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS) { + return UnknownModRefBehavior; + } + virtual ModRefBehavior getModRefBehavior(const Function *F) { + return UnknownModRefBehavior; + } + + virtual bool pointsToConstantMemory(const Value *P) { return false; } + virtual ModRefResult getModRefInfo(ImmutableCallSite CS, + const Value *P, unsigned Size) { + return ModRef; + } + virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + return ModRef; + } + + virtual void deleteValue(Value *V) {} + virtual void copyValue(Value *From, Value *To) {} + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + }; +} // End of anonymous namespace + +// Register this pass... +char NoAA::ID = 0; +INITIALIZE_AG_PASS(NoAA, AliasAnalysis, "no-aa", + "No Alias Analysis (always returns 'may' alias)", + true, true, false); + +ImmutablePass *llvm::createNoAAPass() { return new NoAA(); } + +//===----------------------------------------------------------------------===// +// GetElementPtr Instruction Decomposition and Analysis +//===----------------------------------------------------------------------===// + +namespace { + enum ExtensionKind { + EK_NotExtended, + EK_SignExt, + EK_ZeroExt + }; + + struct VariableGEPIndex { + const Value *V; + ExtensionKind Extension; + int64_t Scale; + }; +} + + +/// GetLinearExpression - Analyze the specified value as a linear expression: +/// "A*V + B", where A and B are constant integers. Return the scale and offset +/// values as APInts and return V as a Value*, and return whether we looked +/// through any sign or zero extends. The incoming Value is known to have +/// IntegerType and it may already be sign or zero extended. +/// +/// Note that this looks through extends, so the high bits may not be +/// represented in the result. +static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, + ExtensionKind &Extension, + const TargetData &TD, unsigned Depth) { + assert(V->getType()->isIntegerTy() && "Not an integer value"); + + // Limit our recursion depth. + if (Depth == 6) { + Scale = 1; + Offset = 0; + return V; + } + + if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) { + if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) { + switch (BOp->getOpcode()) { + default: break; + case Instruction::Or: + // X|C == X+C if all the bits in C are unset in X. Otherwise we can't + // analyze it. + if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &TD)) + break; + // FALL THROUGH. + case Instruction::Add: + V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, + TD, Depth+1); + Offset += RHSC->getValue(); + return V; + case Instruction::Mul: + V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, + TD, Depth+1); + Offset *= RHSC->getValue(); + Scale *= RHSC->getValue(); + return V; + case Instruction::Shl: + V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, + TD, Depth+1); + Offset <<= RHSC->getValue().getLimitedValue(); + Scale <<= RHSC->getValue().getLimitedValue(); + return V; + } + } + } + + // Since GEP indices are sign extended anyway, we don't care about the high + // bits of a sign or zero extended value - just scales and offsets. The + // extensions have to be consistent though. + if ((isa<SExtInst>(V) && Extension != EK_ZeroExt) || + (isa<ZExtInst>(V) && Extension != EK_SignExt)) { + Value *CastOp = cast<CastInst>(V)->getOperand(0); + unsigned OldWidth = Scale.getBitWidth(); + unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits(); + Scale.trunc(SmallWidth); + Offset.trunc(SmallWidth); + Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt; + + Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension, + TD, Depth+1); + Scale.zext(OldWidth); + Offset.zext(OldWidth); + + return Result; + } + + Scale = 1; + Offset = 0; + return V; +} + +/// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it +/// into a base pointer with a constant offset and a number of scaled symbolic +/// offsets. +/// +/// The scaled symbolic offsets (represented by pairs of a Value* and a scale in +/// the VarIndices vector) are Value*'s that are known to be scaled by the +/// specified amount, but which may have other unrepresented high bits. As such, +/// the gep cannot necessarily be reconstructed from its decomposed form. +/// +/// When TargetData is around, this function is capable of analyzing everything +/// that Value::getUnderlyingObject() can look through. When not, it just looks +/// through pointer casts. +/// +static const Value * +DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, + SmallVectorImpl<VariableGEPIndex> &VarIndices, + const TargetData *TD) { + // Limit recursion depth to limit compile time in crazy cases. + unsigned MaxLookup = 6; + + BaseOffs = 0; + do { + // See if this is a bitcast or GEP. + const Operator *Op = dyn_cast<Operator>(V); + if (Op == 0) { + // The only non-operator case we can handle are GlobalAliases. + if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { + if (!GA->mayBeOverridden()) { + V = GA->getAliasee(); + continue; + } + } + return V; + } + + if (Op->getOpcode() == Instruction::BitCast) { + V = Op->getOperand(0); + continue; + } + + const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op); + if (GEPOp == 0) + return V; + + // Don't attempt to analyze GEPs over unsized objects. + if (!cast<PointerType>(GEPOp->getOperand(0)->getType()) + ->getElementType()->isSized()) + return V; + + // If we are lacking TargetData information, we can't compute the offets of + // elements computed by GEPs. However, we can handle bitcast equivalent + // GEPs. + if (TD == 0) { + if (!GEPOp->hasAllZeroIndices()) + return V; + V = GEPOp->getOperand(0); + continue; + } + + // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices. + gep_type_iterator GTI = gep_type_begin(GEPOp); + for (User::const_op_iterator I = GEPOp->op_begin()+1, + E = GEPOp->op_end(); I != E; ++I) { + Value *Index = *I; + // Compute the (potentially symbolic) offset in bytes for this index. + if (const StructType *STy = dyn_cast<StructType>(*GTI++)) { + // For a struct, add the member offset. + unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); + if (FieldNo == 0) continue; + + BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo); + continue; + } + + // For an array/pointer, add the element offset, explicitly scaled. + if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) { + if (CIdx->isZero()) continue; + BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue(); + continue; + } + + uint64_t Scale = TD->getTypeAllocSize(*GTI); + ExtensionKind Extension = EK_NotExtended; + + // If the integer type is smaller than the pointer size, it is implicitly + // sign extended to pointer size. + unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth(); + if (TD->getPointerSizeInBits() > Width) + Extension = EK_SignExt; + + // Use GetLinearExpression to decompose the index into a C1*V+C2 form. + APInt IndexScale(Width, 0), IndexOffset(Width, 0); + Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension, + *TD, 0); + + // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale. + // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale. + BaseOffs += IndexOffset.getZExtValue()*Scale; + Scale *= IndexScale.getZExtValue(); + + + // If we already had an occurrance of this index variable, merge this + // scale into it. For example, we want to handle: + // A[x][x] -> x*16 + x*4 -> x*20 + // This also ensures that 'x' only appears in the index list once. + for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) { + if (VarIndices[i].V == Index && + VarIndices[i].Extension == Extension) { + Scale += VarIndices[i].Scale; + VarIndices.erase(VarIndices.begin()+i); + break; + } + } + + // Make sure that we have a scale that makes sense for this target's + // pointer size. + if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) { + Scale <<= ShiftBits; + Scale >>= ShiftBits; + } + + if (Scale) { + VariableGEPIndex Entry = {Index, Extension, Scale}; + VarIndices.push_back(Entry); + } + } + + // Analyze the base pointer next. + V = GEPOp->getOperand(0); + } while (--MaxLookup); + + // If the chain of expressions is too deep, just return early. + return V; +} + +/// GetIndexDifference - Dest and Src are the variable indices from two +/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base +/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic +/// difference between the two pointers. +static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest, + const SmallVectorImpl<VariableGEPIndex> &Src) { + if (Src.empty()) return; + + for (unsigned i = 0, e = Src.size(); i != e; ++i) { + const Value *V = Src[i].V; + ExtensionKind Extension = Src[i].Extension; + int64_t Scale = Src[i].Scale; + + // Find V in Dest. This is N^2, but pointer indices almost never have more + // than a few variable indexes. + for (unsigned j = 0, e = Dest.size(); j != e; ++j) { + if (Dest[j].V != V || Dest[j].Extension != Extension) continue; + + // If we found it, subtract off Scale V's from the entry in Dest. If it + // goes to zero, remove the entry. + if (Dest[j].Scale != Scale) + Dest[j].Scale -= Scale; + else + Dest.erase(Dest.begin()+j); + Scale = 0; + break; + } + + // If we didn't consume this entry, add it to the end of the Dest list. + if (Scale) { + VariableGEPIndex Entry = { V, Extension, -Scale }; + Dest.push_back(Entry); + } + } +} + +//===----------------------------------------------------------------------===// +// BasicAliasAnalysis Pass +//===----------------------------------------------------------------------===// + +#ifndef NDEBUG +static const Function *getParent(const Value *V) { + if (const Instruction *inst = dyn_cast<Instruction>(V)) + return inst->getParent()->getParent(); + + if (const Argument *arg = dyn_cast<Argument>(V)) + return arg->getParent(); + + return NULL; +} + +static bool notDifferentParent(const Value *O1, const Value *O2) { + + const Function *F1 = getParent(O1); + const Function *F2 = getParent(O2); + + return !F1 || !F2 || F1 == F2; +} +#endif + +namespace { + /// BasicAliasAnalysis - This is the default alias analysis implementation. + /// Because it doesn't chain to a previous alias analysis (like -no-aa), it + /// derives from the NoAA class. + struct BasicAliasAnalysis : public NoAA { + static char ID; // Class identification, replacement for typeinfo + BasicAliasAnalysis() : NoAA(ID) {} + + virtual AliasResult alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size) { + assert(Visited.empty() && "Visited must be cleared after use!"); + assert(notDifferentParent(V1, V2) && + "BasicAliasAnalysis doesn't support interprocedural queries."); + AliasResult Alias = aliasCheck(V1, V1Size, V2, V2Size); + Visited.clear(); + return Alias; + } + + virtual ModRefResult getModRefInfo(ImmutableCallSite CS, + const Value *P, unsigned Size); + + virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + // The AliasAnalysis base class has some smarts, lets use them. + return AliasAnalysis::getModRefInfo(CS1, CS2); + } + + /// pointsToConstantMemory - Chase pointers until we find a (constant + /// global) or not. + virtual bool pointsToConstantMemory(const Value *P); + + /// getModRefBehavior - Return the behavior when calling the given + /// call site. + virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS); + + /// getModRefBehavior - Return the behavior when calling the given function. + /// For use when the call site is not known. + virtual ModRefBehavior getModRefBehavior(const Function *F); + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + + private: + // Visited - Track instructions visited by a aliasPHI, aliasSelect(), and aliasGEP(). + SmallPtrSet<const Value*, 16> Visited; + + // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP + // instruction against another. + AliasResult aliasGEP(const GEPOperator *V1, unsigned V1Size, + const Value *V2, unsigned V2Size, + const Value *UnderlyingV1, const Value *UnderlyingV2); + + // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI + // instruction against another. + AliasResult aliasPHI(const PHINode *PN, unsigned PNSize, + const Value *V2, unsigned V2Size); + + /// aliasSelect - Disambiguate a Select instruction against another value. + AliasResult aliasSelect(const SelectInst *SI, unsigned SISize, + const Value *V2, unsigned V2Size); + + AliasResult aliasCheck(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size); + }; +} // End of anonymous namespace + +// Register this pass... +char BasicAliasAnalysis::ID = 0; +INITIALIZE_AG_PASS(BasicAliasAnalysis, AliasAnalysis, "basicaa", + "Basic Alias Analysis (default AA impl)", + false, true, true); + +ImmutablePass *llvm::createBasicAliasAnalysisPass() { + return new BasicAliasAnalysis(); +} + + +/// pointsToConstantMemory - Chase pointers until we find a (constant +/// global) or not. +bool BasicAliasAnalysis::pointsToConstantMemory(const Value *P) { + if (const GlobalVariable *GV = + dyn_cast<GlobalVariable>(P->getUnderlyingObject())) + // Note: this doesn't require GV to be "ODR" because it isn't legal for a + // global to be marked constant in some modules and non-constant in others. + // GV may even be a declaration, not a definition. + return GV->isConstant(); + + return NoAA::pointsToConstantMemory(P); +} + +/// getModRefBehavior - Return the behavior when calling the given call site. +AliasAnalysis::ModRefBehavior +BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { + if (CS.doesNotAccessMemory()) + // Can't do better than this. + return DoesNotAccessMemory; + + ModRefBehavior Min = UnknownModRefBehavior; + + // If the callsite knows it only reads memory, don't return worse + // than that. + if (CS.onlyReadsMemory()) + Min = OnlyReadsMemory; + + // The AliasAnalysis base class has some smarts, lets use them. + return std::min(AliasAnalysis::getModRefBehavior(CS), Min); +} + +/// getModRefBehavior - Return the behavior when calling the given function. +/// For use when the call site is not known. +AliasAnalysis::ModRefBehavior +BasicAliasAnalysis::getModRefBehavior(const Function *F) { + if (F->doesNotAccessMemory()) + // Can't do better than this. + return DoesNotAccessMemory; + if (F->onlyReadsMemory()) + return OnlyReadsMemory; + if (unsigned id = F->getIntrinsicID()) + return getIntrinsicModRefBehavior(id); + + return NoAA::getModRefBehavior(F); +} + +/// getModRefInfo - Check to see if the specified callsite can clobber the +/// specified memory object. Since we only look at local properties of this +/// function, we really can't say much about this query. We do, however, use +/// simple "address taken" analysis on local objects. +AliasAnalysis::ModRefResult +BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, + const Value *P, unsigned Size) { + assert(notDifferentParent(CS.getInstruction(), P) && + "AliasAnalysis query involving multiple functions!"); + + const Value *Object = P->getUnderlyingObject(); + + // If this is a tail call and P points to a stack location, we know that + // the tail call cannot access or modify the local stack. + // We cannot exclude byval arguments here; these belong to the caller of + // the current function not to the current function, and a tail callee + // may reference them. + if (isa<AllocaInst>(Object)) + if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) + if (CI->isTailCall()) + return NoModRef; + + // If the pointer is to a locally allocated object that does not escape, + // then the call can not mod/ref the pointer unless the call takes the pointer + // as an argument, and itself doesn't capture it. + if (!isa<Constant>(Object) && CS.getInstruction() != Object && + isNonEscapingLocalObject(Object)) { + bool PassedAsArg = false; + unsigned ArgNo = 0; + for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); + CI != CE; ++CI, ++ArgNo) { + // Only look at the no-capture pointer arguments. + if (!(*CI)->getType()->isPointerTy() || + !CS.paramHasAttr(ArgNo+1, Attribute::NoCapture)) + continue; + + // If this is a no-capture pointer argument, see if we can tell that it + // is impossible to alias the pointer we're checking. If not, we have to + // assume that the call could touch the pointer, even though it doesn't + // escape. + if (!isNoAlias(cast<Value>(CI), UnknownSize, P, UnknownSize)) { + PassedAsArg = true; + break; + } + } + + if (!PassedAsArg) + return NoModRef; + } + + // Finally, handle specific knowledge of intrinsics. + const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()); + if (II != 0) + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::memcpy: + case Intrinsic::memmove: { + unsigned Len = UnknownSize; + if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) + Len = LenCI->getZExtValue(); + Value *Dest = II->getArgOperand(0); + Value *Src = II->getArgOperand(1); + if (isNoAlias(Dest, Len, P, Size)) { + if (isNoAlias(Src, Len, P, Size)) + return NoModRef; + return Ref; + } + break; + } + case Intrinsic::memset: + // Since memset is 'accesses arguments' only, the AliasAnalysis base class + // will handle it for the variable length case. + if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) { + unsigned Len = LenCI->getZExtValue(); + Value *Dest = II->getArgOperand(0); + if (isNoAlias(Dest, Len, P, Size)) + return NoModRef; + } + break; + case Intrinsic::atomic_cmp_swap: + case Intrinsic::atomic_swap: + case Intrinsic::atomic_load_add: + case Intrinsic::atomic_load_sub: + case Intrinsic::atomic_load_and: + case Intrinsic::atomic_load_nand: + case Intrinsic::atomic_load_or: + case Intrinsic::atomic_load_xor: + case Intrinsic::atomic_load_max: + case Intrinsic::atomic_load_min: + case Intrinsic::atomic_load_umax: + case Intrinsic::atomic_load_umin: + if (TD) { + Value *Op1 = II->getArgOperand(0); + unsigned Op1Size = TD->getTypeStoreSize(Op1->getType()); + if (isNoAlias(Op1, Op1Size, P, Size)) + return NoModRef; + } + break; + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: { + unsigned PtrSize = + cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); + if (isNoAlias(II->getArgOperand(1), PtrSize, P, Size)) + return NoModRef; + break; + } + case Intrinsic::invariant_end: { + unsigned PtrSize = + cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(); + if (isNoAlias(II->getArgOperand(2), PtrSize, P, Size)) + return NoModRef; + break; + } + } + + // The AliasAnalysis base class has some smarts, lets use them. + return AliasAnalysis::getModRefInfo(CS, P, Size); +} + + +/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction +/// against another pointer. We know that V1 is a GEP, but we don't know +/// anything about V2. UnderlyingV1 is GEP1->getUnderlyingObject(), +/// UnderlyingV2 is the same for V2. +/// +AliasAnalysis::AliasResult +BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size, + const Value *V2, unsigned V2Size, + const Value *UnderlyingV1, + const Value *UnderlyingV2) { + // If this GEP has been visited before, we're on a use-def cycle. + // Such cycles are only valid when PHI nodes are involved or in unreachable + // code. The visitPHI function catches cycles containing PHIs, but there + // could still be a cycle without PHIs in unreachable code. + if (!Visited.insert(GEP1)) + return MayAlias; + + int64_t GEP1BaseOffset; + SmallVector<VariableGEPIndex, 4> GEP1VariableIndices; + + // If we have two gep instructions with must-alias'ing base pointers, figure + // out if the indexes to the GEP tell us anything about the derived pointer. + if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) { + // Do the base pointers alias? + AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, + UnderlyingV2, UnknownSize); + + // If we get a No or May, then return it immediately, no amount of analysis + // will improve this situation. + if (BaseAlias != MustAlias) return BaseAlias; + + // Otherwise, we have a MustAlias. Since the base pointers alias each other + // exactly, see if the computed offset from the common pointer tells us + // about the relation of the resulting pointer. + const Value *GEP1BasePtr = + DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD); + + int64_t GEP2BaseOffset; + SmallVector<VariableGEPIndex, 4> GEP2VariableIndices; + const Value *GEP2BasePtr = + DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD); + + // If DecomposeGEPExpression isn't able to look all the way through the + // addressing operation, we must not have TD and this is too complex for us + // to handle without it. + if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { + assert(TD == 0 && + "DecomposeGEPExpression and getUnderlyingObject disagree!"); + return MayAlias; + } + + // Subtract the GEP2 pointer from the GEP1 pointer to find out their + // symbolic difference. + GEP1BaseOffset -= GEP2BaseOffset; + GetIndexDifference(GEP1VariableIndices, GEP2VariableIndices); + + } else { + // Check to see if these two pointers are related by the getelementptr + // instruction. If one pointer is a GEP with a non-zero index of the other + // pointer, we know they cannot alias. + + // If both accesses are unknown size, we can't do anything useful here. + if (V1Size == UnknownSize && V2Size == UnknownSize) + return MayAlias; + + AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, V2, V2Size); + if (R != MustAlias) + // If V2 may alias GEP base pointer, conservatively returns MayAlias. + // If V2 is known not to alias GEP base pointer, then the two values + // cannot alias per GEP semantics: "A pointer value formed from a + // getelementptr instruction is associated with the addresses associated + // with the first operand of the getelementptr". + return R; + + const Value *GEP1BasePtr = + DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD); + + // If DecomposeGEPExpression isn't able to look all the way through the + // addressing operation, we must not have TD and this is too complex for us + // to handle without it. + if (GEP1BasePtr != UnderlyingV1) { + assert(TD == 0 && + "DecomposeGEPExpression and getUnderlyingObject disagree!"); + return MayAlias; + } + } + + // In the two GEP Case, if there is no difference in the offsets of the + // computed pointers, the resultant pointers are a must alias. This + // hapens when we have two lexically identical GEP's (for example). + // + // In the other case, if we have getelementptr <ptr>, 0, 0, 0, 0, ... and V2 + // must aliases the GEP, the end result is a must alias also. + if (GEP1BaseOffset == 0 && GEP1VariableIndices.empty()) + return MustAlias; + + // If we have a known constant offset, see if this offset is larger than the + // access size being queried. If so, and if no variable indices can remove + // pieces of this constant, then we know we have a no-alias. For example, + // &A[100] != &A. + + // In order to handle cases like &A[100][i] where i is an out of range + // subscript, we have to ignore all constant offset pieces that are a multiple + // of a scaled index. Do this by removing constant offsets that are a + // multiple of any of our variable indices. This allows us to transform + // things like &A[i][1] because i has a stride of (e.g.) 8 bytes but the 1 + // provides an offset of 4 bytes (assuming a <= 4 byte access). + for (unsigned i = 0, e = GEP1VariableIndices.size(); + i != e && GEP1BaseOffset;++i) + if (int64_t RemovedOffset = GEP1BaseOffset/GEP1VariableIndices[i].Scale) + GEP1BaseOffset -= RemovedOffset*GEP1VariableIndices[i].Scale; + + // If our known offset is bigger than the access size, we know we don't have + // an alias. + if (GEP1BaseOffset) { + if (GEP1BaseOffset >= (int64_t)V2Size || + GEP1BaseOffset <= -(int64_t)V1Size) + return NoAlias; + } + + return MayAlias; +} + +/// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select +/// instruction against another. +AliasAnalysis::AliasResult +BasicAliasAnalysis::aliasSelect(const SelectInst *SI, unsigned SISize, + const Value *V2, unsigned V2Size) { + // If this select has been visited before, we're on a use-def cycle. + // Such cycles are only valid when PHI nodes are involved or in unreachable + // code. The visitPHI function catches cycles containing PHIs, but there + // could still be a cycle without PHIs in unreachable code. + if (!Visited.insert(SI)) + return MayAlias; + + // If the values are Selects with the same condition, we can do a more precise + // check: just check for aliases between the values on corresponding arms. + if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) + if (SI->getCondition() == SI2->getCondition()) { + AliasResult Alias = + aliasCheck(SI->getTrueValue(), SISize, + SI2->getTrueValue(), V2Size); + if (Alias == MayAlias) + return MayAlias; + AliasResult ThisAlias = + aliasCheck(SI->getFalseValue(), SISize, + SI2->getFalseValue(), V2Size); + if (ThisAlias != Alias) + return MayAlias; + return Alias; + } + + // If both arms of the Select node NoAlias or MustAlias V2, then returns + // NoAlias / MustAlias. Otherwise, returns MayAlias. + AliasResult Alias = + aliasCheck(V2, V2Size, SI->getTrueValue(), SISize); + if (Alias == MayAlias) + return MayAlias; + + // If V2 is visited, the recursive case will have been caught in the + // above aliasCheck call, so these subsequent calls to aliasCheck + // don't need to assume that V2 is being visited recursively. + Visited.erase(V2); + + AliasResult ThisAlias = + aliasCheck(V2, V2Size, SI->getFalseValue(), SISize); + if (ThisAlias != Alias) + return MayAlias; + return Alias; +} + +// aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction +// against another. +AliasAnalysis::AliasResult +BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize, + const Value *V2, unsigned V2Size) { + // The PHI node has already been visited, avoid recursion any further. + if (!Visited.insert(PN)) + return MayAlias; + + // If the values are PHIs in the same block, we can do a more precise + // as well as efficient check: just check for aliases between the values + // on corresponding edges. + if (const PHINode *PN2 = dyn_cast<PHINode>(V2)) + if (PN2->getParent() == PN->getParent()) { + AliasResult Alias = + aliasCheck(PN->getIncomingValue(0), PNSize, + PN2->getIncomingValueForBlock(PN->getIncomingBlock(0)), + V2Size); + if (Alias == MayAlias) + return MayAlias; + for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { + AliasResult ThisAlias = + aliasCheck(PN->getIncomingValue(i), PNSize, + PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)), + V2Size); + if (ThisAlias != Alias) + return MayAlias; + } + return Alias; + } + + SmallPtrSet<Value*, 4> UniqueSrc; + SmallVector<Value*, 4> V1Srcs; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *PV1 = PN->getIncomingValue(i); + if (isa<PHINode>(PV1)) + // If any of the source itself is a PHI, return MayAlias conservatively + // to avoid compile time explosion. The worst possible case is if both + // sides are PHI nodes. In which case, this is O(m x n) time where 'm' + // and 'n' are the number of PHI sources. + return MayAlias; + if (UniqueSrc.insert(PV1)) + V1Srcs.push_back(PV1); + } + + AliasResult Alias = aliasCheck(V2, V2Size, V1Srcs[0], PNSize); + // Early exit if the check of the first PHI source against V2 is MayAlias. + // Other results are not possible. + if (Alias == MayAlias) + return MayAlias; + + // If all sources of the PHI node NoAlias or MustAlias V2, then returns + // NoAlias / MustAlias. Otherwise, returns MayAlias. + for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) { + Value *V = V1Srcs[i]; + + // If V2 is visited, the recursive case will have been caught in the + // above aliasCheck call, so these subsequent calls to aliasCheck + // don't need to assume that V2 is being visited recursively. + Visited.erase(V2); + + AliasResult ThisAlias = aliasCheck(V2, V2Size, V, PNSize); + if (ThisAlias != Alias || ThisAlias == MayAlias) + return MayAlias; + } + + return Alias; +} + +// aliasCheck - Provide a bunch of ad-hoc rules to disambiguate in common cases, +// such as array references. +// +AliasAnalysis::AliasResult +BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size) { + // If either of the memory references is empty, it doesn't matter what the + // pointer values are. + if (V1Size == 0 || V2Size == 0) + return NoAlias; + + // Strip off any casts if they exist. + V1 = V1->stripPointerCasts(); + V2 = V2->stripPointerCasts(); + + // Are we checking for alias of the same value? + if (V1 == V2) return MustAlias; + + if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy()) + return NoAlias; // Scalars cannot alias each other + + // Figure out what objects these things are pointing to if we can. + const Value *O1 = V1->getUnderlyingObject(); + const Value *O2 = V2->getUnderlyingObject(); + + // Null values in the default address space don't point to any object, so they + // don't alias any other pointer. + if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O1)) + if (CPN->getType()->getAddressSpace() == 0) + return NoAlias; + if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O2)) + if (CPN->getType()->getAddressSpace() == 0) + return NoAlias; + + if (O1 != O2) { + // If V1/V2 point to two different objects we know that we have no alias. + if (isIdentifiedObject(O1) && isIdentifiedObject(O2)) + return NoAlias; + + // Constant pointers can't alias with non-const isIdentifiedObject objects. + if ((isa<Constant>(O1) && isIdentifiedObject(O2) && !isa<Constant>(O2)) || + (isa<Constant>(O2) && isIdentifiedObject(O1) && !isa<Constant>(O1))) + return NoAlias; + + // Arguments can't alias with local allocations or noalias calls + // in the same function. + if (((isa<Argument>(O1) && (isa<AllocaInst>(O2) || isNoAliasCall(O2))) || + (isa<Argument>(O2) && (isa<AllocaInst>(O1) || isNoAliasCall(O1))))) + return NoAlias; + + // Most objects can't alias null. + if ((isa<ConstantPointerNull>(O2) && isKnownNonNull(O1)) || + (isa<ConstantPointerNull>(O1) && isKnownNonNull(O2))) + return NoAlias; + + // If one pointer is the result of a call/invoke or load and the other is a + // non-escaping local object within the same function, then we know the + // object couldn't escape to a point where the call could return it. + // + // Note that if the pointers are in different functions, there are a + // variety of complications. A call with a nocapture argument may still + // temporary store the nocapture argument's value in a temporary memory + // location if that memory location doesn't escape. Or it may pass a + // nocapture value to other functions as long as they don't capture it. + if (isEscapeSource(O1) && isNonEscapingLocalObject(O2)) + return NoAlias; + if (isEscapeSource(O2) && isNonEscapingLocalObject(O1)) + return NoAlias; + } + + // If the size of one access is larger than the entire object on the other + // side, then we know such behavior is undefined and can assume no alias. + if (TD) + if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD)) || + (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD))) + return NoAlias; + + // FIXME: This isn't aggressively handling alias(GEP, PHI) for example: if the + // GEP can't simplify, we don't even look at the PHI cases. + if (!isa<GEPOperator>(V1) && isa<GEPOperator>(V2)) { + std::swap(V1, V2); + std::swap(V1Size, V2Size); + std::swap(O1, O2); + } + if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) + return aliasGEP(GV1, V1Size, V2, V2Size, O1, O2); + + if (isa<PHINode>(V2) && !isa<PHINode>(V1)) { + std::swap(V1, V2); + std::swap(V1Size, V2Size); + } + if (const PHINode *PN = dyn_cast<PHINode>(V1)) + return aliasPHI(PN, V1Size, V2, V2Size); + + if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) { + std::swap(V1, V2); + std::swap(V1Size, V2Size); + } + if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) + return aliasSelect(S1, V1Size, V2, V2Size); + + return NoAA::alias(V1, V1Size, V2, V2Size); +} + +// Make sure that anything that uses AliasAnalysis pulls in this file. +DEFINING_FILE_FOR(BasicAliasAnalysis) diff --git a/contrib/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm/lib/Analysis/CFGPrinter.cpp new file mode 100644 index 0000000..617a362 --- /dev/null +++ b/contrib/llvm/lib/Analysis/CFGPrinter.cpp @@ -0,0 +1,158 @@ +//===- CFGPrinter.cpp - DOT printer for the control flow graph ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a '-dot-cfg' analysis pass, which emits the +// cfg.<fnname>.dot file for each function in the program, with a graph of the +// CFG for that function. +// +// The other main feature of this file is that it implements the +// Function::viewCFG method, which is useful for debugging passes which operate +// on the CFG. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CFGPrinter.h" + +#include "llvm/Pass.h" +using namespace llvm; + +namespace { + struct CFGViewer : public FunctionPass { + static char ID; // Pass identifcation, replacement for typeid + CFGViewer() : FunctionPass(ID) {} + + virtual bool runOnFunction(Function &F) { + F.viewCFG(); + return false; + } + + void print(raw_ostream &OS, const Module* = 0) const {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; +} + +char CFGViewer::ID = 0; +INITIALIZE_PASS(CFGViewer, "view-cfg", "View CFG of function", false, true); + +namespace { + struct CFGOnlyViewer : public FunctionPass { + static char ID; // Pass identifcation, replacement for typeid + CFGOnlyViewer() : FunctionPass(ID) {} + + virtual bool runOnFunction(Function &F) { + F.viewCFGOnly(); + return false; + } + + void print(raw_ostream &OS, const Module* = 0) const {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; +} + +char CFGOnlyViewer::ID = 0; +INITIALIZE_PASS(CFGOnlyViewer, "view-cfg-only", + "View CFG of function (with no function bodies)", false, true); + +namespace { + struct CFGPrinter : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + CFGPrinter() : FunctionPass(ID) {} + explicit CFGPrinter(char &pid) : FunctionPass(pid) {} + + virtual bool runOnFunction(Function &F) { + std::string Filename = "cfg." + F.getNameStr() + ".dot"; + errs() << "Writing '" << Filename << "'..."; + + std::string ErrorInfo; + raw_fd_ostream File(Filename.c_str(), ErrorInfo); + + if (ErrorInfo.empty()) + WriteGraph(File, (const Function*)&F); + else + errs() << " error opening file for writing!"; + errs() << "\n"; + return false; + } + + void print(raw_ostream &OS, const Module* = 0) const {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; +} + +char CFGPrinter::ID = 0; +INITIALIZE_PASS(CFGPrinter, "dot-cfg", "Print CFG of function to 'dot' file", + false, true); + +namespace { + struct CFGOnlyPrinter : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + CFGOnlyPrinter() : FunctionPass(ID) {} + explicit CFGOnlyPrinter(char &pid) : FunctionPass(pid) {} + virtual bool runOnFunction(Function &F) { + std::string Filename = "cfg." + F.getNameStr() + ".dot"; + errs() << "Writing '" << Filename << "'..."; + + std::string ErrorInfo; + raw_fd_ostream File(Filename.c_str(), ErrorInfo); + + if (ErrorInfo.empty()) + WriteGraph(File, (const Function*)&F, true); + else + errs() << " error opening file for writing!"; + errs() << "\n"; + return false; + } + void print(raw_ostream &OS, const Module* = 0) const {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; +} + +char CFGOnlyPrinter::ID = 0; +INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only", + "Print CFG of function to 'dot' file (with no function bodies)", + false, true); + +/// viewCFG - This function is meant for use from the debugger. You can just +/// say 'call F->viewCFG()' and a ghostview window should pop up from the +/// program, displaying the CFG of the current function. This depends on there +/// being a 'dot' and 'gv' program in your path. +/// +void Function::viewCFG() const { + ViewGraph(this, "cfg" + getNameStr()); +} + +/// viewCFGOnly - This function is meant for use from the debugger. It works +/// just like viewCFG, but it does not include the contents of basic blocks +/// into the nodes, just the label. If you are only interested in the CFG t +/// his can make the graph smaller. +/// +void Function::viewCFGOnly() const { + ViewGraph(this, "cfg" + getNameStr(), true); +} + +FunctionPass *llvm::createCFGPrinterPass () { + return new CFGPrinter(); +} + +FunctionPass *llvm::createCFGOnlyPrinterPass () { + return new CFGOnlyPrinter(); +} + diff --git a/contrib/llvm/lib/Analysis/CMakeLists.txt b/contrib/llvm/lib/Analysis/CMakeLists.txt new file mode 100644 index 0000000..6a2ab68 --- /dev/null +++ b/contrib/llvm/lib/Analysis/CMakeLists.txt @@ -0,0 +1,53 @@ +add_llvm_library(LLVMAnalysis + AliasAnalysis.cpp + AliasAnalysisCounter.cpp + AliasAnalysisEvaluator.cpp + AliasDebugger.cpp + AliasSetTracker.cpp + Analysis.cpp + BasicAliasAnalysis.cpp + CFGPrinter.cpp + CaptureTracking.cpp + ConstantFolding.cpp + DbgInfoPrinter.cpp + DebugInfo.cpp + DomPrinter.cpp + IVUsers.cpp + InlineCost.cpp + InstCount.cpp + InstructionSimplify.cpp + Interval.cpp + IntervalPartition.cpp + LazyValueInfo.cpp + LibCallAliasAnalysis.cpp + LibCallSemantics.cpp + Lint.cpp + LiveValues.cpp + Loads.cpp + LoopDependenceAnalysis.cpp + LoopInfo.cpp + LoopPass.cpp + MemoryBuiltins.cpp + MemoryDependenceAnalysis.cpp + ModuleDebugInfoPrinter.cpp + PHITransAddr.cpp + PointerTracking.cpp + PostDominators.cpp + ProfileEstimatorPass.cpp + ProfileInfo.cpp + ProfileInfoLoader.cpp + ProfileInfoLoaderPass.cpp + ProfileVerifierPass.cpp + RegionInfo.cpp + RegionPrinter.cpp + ScalarEvolution.cpp + ScalarEvolutionAliasAnalysis.cpp + ScalarEvolutionExpander.cpp + ScalarEvolutionNormalization.cpp + SparsePropagation.cpp + Trace.cpp + TypeBasedAliasAnalysis.cpp + ValueTracking.cpp + ) + +target_link_libraries (LLVMAnalysis LLVMSupport) diff --git a/contrib/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm/lib/Analysis/CaptureTracking.cpp new file mode 100644 index 0000000..90eae20 --- /dev/null +++ b/contrib/llvm/lib/Analysis/CaptureTracking.cpp @@ -0,0 +1,144 @@ +//===--- CaptureTracking.cpp - Determine whether a pointer is captured ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains routines that help determine which pointers are captured. +// A pointer value is captured if the function makes a copy of any part of the +// pointer that outlives the call. Not being captured means, more or less, that +// the pointer is only dereferenced and not stored in a global. Returning part +// of the pointer as the function return value may or may not count as capturing +// the pointer, depending on the context. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Instructions.h" +#include "llvm/Value.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/CallSite.h" +using namespace llvm; + +/// As its comment mentions, PointerMayBeCaptured can be expensive. +/// However, it's not easy for BasicAA to cache the result, because +/// it's an ImmutablePass. To work around this, bound queries at a +/// fixed number of uses. +/// +/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep +/// a cache. Then we can move the code from BasicAliasAnalysis into +/// that path, and remove this threshold. +static int const Threshold = 20; + +/// PointerMayBeCaptured - Return true if this pointer value may be captured +/// by the enclosing function (which is required to exist). This routine can +/// be expensive, so consider caching the results. The boolean ReturnCaptures +/// specifies whether returning the value (or part of it) from the function +/// counts as capturing it or not. The boolean StoreCaptures specified whether +/// storing the value (or part of it) into memory anywhere automatically +/// counts as capturing it or not. +bool llvm::PointerMayBeCaptured(const Value *V, + bool ReturnCaptures, bool StoreCaptures) { + assert(V->getType()->isPointerTy() && "Capture is for pointers only!"); + SmallVector<Use*, Threshold> Worklist; + SmallSet<Use*, Threshold> Visited; + int Count = 0; + + for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); + UI != UE; ++UI) { + // If there are lots of uses, conservatively say that the value + // is captured to avoid taking too much compile time. + if (Count++ >= Threshold) + return true; + + Use *U = &UI.getUse(); + Visited.insert(U); + Worklist.push_back(U); + } + + while (!Worklist.empty()) { + Use *U = Worklist.pop_back_val(); + Instruction *I = cast<Instruction>(U->getUser()); + V = U->get(); + + switch (I->getOpcode()) { + case Instruction::Call: + case Instruction::Invoke: { + CallSite CS(I); + // Not captured if the callee is readonly, doesn't return a copy through + // its return value and doesn't unwind (a readonly function can leak bits + // by throwing an exception or not depending on the input value). + if (CS.onlyReadsMemory() && CS.doesNotThrow() && I->getType()->isVoidTy()) + break; + + // Not captured if only passed via 'nocapture' arguments. Note that + // calling a function pointer does not in itself cause the pointer to + // be captured. This is a subtle point considering that (for example) + // the callee might return its own address. It is analogous to saying + // that loading a value from a pointer does not cause the pointer to be + // captured, even though the loaded value might be the pointer itself + // (think of self-referential objects). + CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); + for (CallSite::arg_iterator A = B; A != E; ++A) + if (A->get() == V && !CS.paramHasAttr(A - B + 1, Attribute::NoCapture)) + // The parameter is not marked 'nocapture' - captured. + return true; + // Only passed via 'nocapture' arguments, or is the called function - not + // captured. + break; + } + case Instruction::Load: + // Loading from a pointer does not cause it to be captured. + break; + case Instruction::Ret: + if (ReturnCaptures) + return true; + break; + case Instruction::Store: + if (V == I->getOperand(0)) + // Stored the pointer - conservatively assume it may be captured. + // TODO: If StoreCaptures is not true, we could do Fancy analysis + // to determine whether this store is not actually an escape point. + // In that case, BasicAliasAnalysis should be updated as well to + // take advantage of this. + return true; + // Storing to the pointee does not cause the pointer to be captured. + break; + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::PHI: + case Instruction::Select: + // The original value is not captured via this if the new value isn't. + for (Instruction::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) { + Use *U = &UI.getUse(); + if (Visited.insert(U)) + Worklist.push_back(U); + } + break; + case Instruction::ICmp: + // Don't count comparisons of a no-alias return value against null as + // captures. This allows us to ignore comparisons of malloc results + // with null, for example. + if (isNoAliasCall(V->stripPointerCasts())) + if (ConstantPointerNull *CPN = + dyn_cast<ConstantPointerNull>(I->getOperand(1))) + if (CPN->getType()->getAddressSpace() == 0) + break; + // Otherwise, be conservative. There are crazy ways to capture pointers + // using comparisons. + return true; + default: + // Something else - be conservative and say it is captured. + return true; + } + } + + // All uses examined - not captured. + return false; +} diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp new file mode 100644 index 0000000..0bf7967 --- /dev/null +++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp @@ -0,0 +1,1288 @@ +//===-- ConstantFolding.cpp - Fold instructions into constants ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines routines for folding instructions into constants. +// +// Also, to supplement the basic VMCore ConstantExpr simplifications, +// this file defines some additional folding routines that can make use of +// TargetData information. These functions cannot go in VMCore due to library +// dependency issues. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/MathExtras.h" +#include <cerrno> +#include <cmath> +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Constant Folding internal helper functions +//===----------------------------------------------------------------------===// + +/// FoldBitCast - Constant fold bitcast, symbolically evaluating it with +/// TargetData. This always returns a non-null constant, but it may be a +/// ConstantExpr if unfoldable. +static Constant *FoldBitCast(Constant *C, const Type *DestTy, + const TargetData &TD) { + + // This only handles casts to vectors currently. + const VectorType *DestVTy = dyn_cast<VectorType>(DestTy); + if (DestVTy == 0) + return ConstantExpr::getBitCast(C, DestTy); + + // If this is a scalar -> vector cast, convert the input into a <1 x scalar> + // vector so the code below can handle it uniformly. + if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) { + Constant *Ops = C; // don't take the address of C! + return FoldBitCast(ConstantVector::get(&Ops, 1), DestTy, TD); + } + + // If this is a bitcast from constant vector -> vector, fold it. + ConstantVector *CV = dyn_cast<ConstantVector>(C); + if (CV == 0) + return ConstantExpr::getBitCast(C, DestTy); + + // If the element types match, VMCore can fold it. + unsigned NumDstElt = DestVTy->getNumElements(); + unsigned NumSrcElt = CV->getNumOperands(); + if (NumDstElt == NumSrcElt) + return ConstantExpr::getBitCast(C, DestTy); + + const Type *SrcEltTy = CV->getType()->getElementType(); + const Type *DstEltTy = DestVTy->getElementType(); + + // Otherwise, we're changing the number of elements in a vector, which + // requires endianness information to do the right thing. For example, + // bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>) + // folds to (little endian): + // <4 x i32> <i32 0, i32 0, i32 1, i32 0> + // and to (big endian): + // <4 x i32> <i32 0, i32 0, i32 0, i32 1> + + // First thing is first. We only want to think about integer here, so if + // we have something in FP form, recast it as integer. + if (DstEltTy->isFloatingPointTy()) { + // Fold to an vector of integers with same size as our FP type. + unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits(); + const Type *DestIVTy = + VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt); + // Recursively handle this integer conversion, if possible. + C = FoldBitCast(C, DestIVTy, TD); + if (!C) return ConstantExpr::getBitCast(C, DestTy); + + // Finally, VMCore can handle this now that #elts line up. + return ConstantExpr::getBitCast(C, DestTy); + } + + // Okay, we know the destination is integer, if the input is FP, convert + // it to integer first. + if (SrcEltTy->isFloatingPointTy()) { + unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); + const Type *SrcIVTy = + VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt); + // Ask VMCore to do the conversion now that #elts line up. + C = ConstantExpr::getBitCast(C, SrcIVTy); + CV = dyn_cast<ConstantVector>(C); + if (!CV) // If VMCore wasn't able to fold it, bail out. + return C; + } + + // Now we know that the input and output vectors are both integer vectors + // of the same size, and that their #elements is not the same. Do the + // conversion here, which depends on whether the input or output has + // more elements. + bool isLittleEndian = TD.isLittleEndian(); + + SmallVector<Constant*, 32> Result; + if (NumDstElt < NumSrcElt) { + // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>) + Constant *Zero = Constant::getNullValue(DstEltTy); + unsigned Ratio = NumSrcElt/NumDstElt; + unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits(); + unsigned SrcElt = 0; + for (unsigned i = 0; i != NumDstElt; ++i) { + // Build each element of the result. + Constant *Elt = Zero; + unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1); + for (unsigned j = 0; j != Ratio; ++j) { + Constant *Src = dyn_cast<ConstantInt>(CV->getOperand(SrcElt++)); + if (!Src) // Reject constantexpr elements. + return ConstantExpr::getBitCast(C, DestTy); + + // Zero extend the element to the right size. + Src = ConstantExpr::getZExt(Src, Elt->getType()); + + // Shift it to the right place, depending on endianness. + Src = ConstantExpr::getShl(Src, + ConstantInt::get(Src->getType(), ShiftAmt)); + ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize; + + // Mix it in. + Elt = ConstantExpr::getOr(Elt, Src); + } + Result.push_back(Elt); + } + } else { + // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>) + unsigned Ratio = NumDstElt/NumSrcElt; + unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits(); + + // Loop over each source value, expanding into multiple results. + for (unsigned i = 0; i != NumSrcElt; ++i) { + Constant *Src = dyn_cast<ConstantInt>(CV->getOperand(i)); + if (!Src) // Reject constantexpr elements. + return ConstantExpr::getBitCast(C, DestTy); + + unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1); + for (unsigned j = 0; j != Ratio; ++j) { + // Shift the piece of the value into the right place, depending on + // endianness. + Constant *Elt = ConstantExpr::getLShr(Src, + ConstantInt::get(Src->getType(), ShiftAmt)); + ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize; + + // Truncate and remember this piece. + Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy)); + } + } + } + + return ConstantVector::get(Result.data(), Result.size()); +} + + +/// IsConstantOffsetFromGlobal - If this constant is actually a constant offset +/// from a global, return the global and the constant. Because of +/// constantexprs, this function is recursive. +static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, + int64_t &Offset, const TargetData &TD) { + // Trivial case, constant is the global. + if ((GV = dyn_cast<GlobalValue>(C))) { + Offset = 0; + return true; + } + + // Otherwise, if this isn't a constant expr, bail out. + ConstantExpr *CE = dyn_cast<ConstantExpr>(C); + if (!CE) return false; + + // Look through ptr->int and ptr->ptr casts. + if (CE->getOpcode() == Instruction::PtrToInt || + CE->getOpcode() == Instruction::BitCast) + return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD); + + // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) + if (CE->getOpcode() == Instruction::GetElementPtr) { + // Cannot compute this if the element type of the pointer is missing size + // info. + if (!cast<PointerType>(CE->getOperand(0)->getType()) + ->getElementType()->isSized()) + return false; + + // If the base isn't a global+constant, we aren't either. + if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD)) + return false; + + // Otherwise, add any offset that our operands provide. + gep_type_iterator GTI = gep_type_begin(CE); + for (User::const_op_iterator i = CE->op_begin() + 1, e = CE->op_end(); + i != e; ++i, ++GTI) { + ConstantInt *CI = dyn_cast<ConstantInt>(*i); + if (!CI) return false; // Index isn't a simple constant? + if (CI->isZero()) continue; // Not adding anything. + + if (const StructType *ST = dyn_cast<StructType>(*GTI)) { + // N = N + Offset + Offset += TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue()); + } else { + const SequentialType *SQT = cast<SequentialType>(*GTI); + Offset += TD.getTypeAllocSize(SQT->getElementType())*CI->getSExtValue(); + } + } + return true; + } + + return false; +} + +/// ReadDataFromGlobal - Recursive helper to read bits out of global. C is the +/// constant being copied out of. ByteOffset is an offset into C. CurPtr is the +/// pointer to copy results into and BytesLeft is the number of bytes left in +/// the CurPtr buffer. TD is the target data. +static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, + unsigned char *CurPtr, unsigned BytesLeft, + const TargetData &TD) { + assert(ByteOffset <= TD.getTypeAllocSize(C->getType()) && + "Out of range access"); + + // If this element is zero or undefined, we can just return since *CurPtr is + // zero initialized. + if (isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) + return true; + + if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) { + if (CI->getBitWidth() > 64 || + (CI->getBitWidth() & 7) != 0) + return false; + + uint64_t Val = CI->getZExtValue(); + unsigned IntBytes = unsigned(CI->getBitWidth()/8); + + for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) { + CurPtr[i] = (unsigned char)(Val >> (ByteOffset * 8)); + ++ByteOffset; + } + return true; + } + + if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) { + if (CFP->getType()->isDoubleTy()) { + C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), TD); + return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD); + } + if (CFP->getType()->isFloatTy()){ + C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), TD); + return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD); + } + return false; + } + + if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) { + const StructLayout *SL = TD.getStructLayout(CS->getType()); + unsigned Index = SL->getElementContainingOffset(ByteOffset); + uint64_t CurEltOffset = SL->getElementOffset(Index); + ByteOffset -= CurEltOffset; + + while (1) { + // If the element access is to the element itself and not to tail padding, + // read the bytes from the element. + uint64_t EltSize = TD.getTypeAllocSize(CS->getOperand(Index)->getType()); + + if (ByteOffset < EltSize && + !ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr, + BytesLeft, TD)) + return false; + + ++Index; + + // Check to see if we read from the last struct element, if so we're done. + if (Index == CS->getType()->getNumElements()) + return true; + + // If we read all of the bytes we needed from this element we're done. + uint64_t NextEltOffset = SL->getElementOffset(Index); + + if (BytesLeft <= NextEltOffset-CurEltOffset-ByteOffset) + return true; + + // Move to the next element of the struct. + CurPtr += NextEltOffset-CurEltOffset-ByteOffset; + BytesLeft -= NextEltOffset-CurEltOffset-ByteOffset; + ByteOffset = 0; + CurEltOffset = NextEltOffset; + } + // not reached. + } + + if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) { + uint64_t EltSize = TD.getTypeAllocSize(CA->getType()->getElementType()); + uint64_t Index = ByteOffset / EltSize; + uint64_t Offset = ByteOffset - Index * EltSize; + for (; Index != CA->getType()->getNumElements(); ++Index) { + if (!ReadDataFromGlobal(CA->getOperand(Index), Offset, CurPtr, + BytesLeft, TD)) + return false; + if (EltSize >= BytesLeft) + return true; + + Offset = 0; + BytesLeft -= EltSize; + CurPtr += EltSize; + } + return true; + } + + if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) { + uint64_t EltSize = TD.getTypeAllocSize(CV->getType()->getElementType()); + uint64_t Index = ByteOffset / EltSize; + uint64_t Offset = ByteOffset - Index * EltSize; + for (; Index != CV->getType()->getNumElements(); ++Index) { + if (!ReadDataFromGlobal(CV->getOperand(Index), Offset, CurPtr, + BytesLeft, TD)) + return false; + if (EltSize >= BytesLeft) + return true; + + Offset = 0; + BytesLeft -= EltSize; + CurPtr += EltSize; + } + return true; + } + + // Otherwise, unknown initializer type. + return false; +} + +static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, + const TargetData &TD) { + const Type *LoadTy = cast<PointerType>(C->getType())->getElementType(); + const IntegerType *IntType = dyn_cast<IntegerType>(LoadTy); + + // If this isn't an integer load we can't fold it directly. + if (!IntType) { + // If this is a float/double load, we can try folding it as an int32/64 load + // and then bitcast the result. This can be useful for union cases. Note + // that address spaces don't matter here since we're not going to result in + // an actual new load. + const Type *MapTy; + if (LoadTy->isFloatTy()) + MapTy = Type::getInt32PtrTy(C->getContext()); + else if (LoadTy->isDoubleTy()) + MapTy = Type::getInt64PtrTy(C->getContext()); + else if (LoadTy->isVectorTy()) { + MapTy = IntegerType::get(C->getContext(), + TD.getTypeAllocSizeInBits(LoadTy)); + MapTy = PointerType::getUnqual(MapTy); + } else + return 0; + + C = FoldBitCast(C, MapTy, TD); + if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, TD)) + return FoldBitCast(Res, LoadTy, TD); + return 0; + } + + unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8; + if (BytesLoaded > 32 || BytesLoaded == 0) return 0; + + GlobalValue *GVal; + int64_t Offset; + if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD)) + return 0; + + GlobalVariable *GV = dyn_cast<GlobalVariable>(GVal); + if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() || + !GV->getInitializer()->getType()->isSized()) + return 0; + + // If we're loading off the beginning of the global, some bytes may be valid, + // but we don't try to handle this. + if (Offset < 0) return 0; + + // If we're not accessing anything in this constant, the result is undefined. + if (uint64_t(Offset) >= TD.getTypeAllocSize(GV->getInitializer()->getType())) + return UndefValue::get(IntType); + + unsigned char RawBytes[32] = {0}; + if (!ReadDataFromGlobal(GV->getInitializer(), Offset, RawBytes, + BytesLoaded, TD)) + return 0; + + APInt ResultVal = APInt(IntType->getBitWidth(), RawBytes[BytesLoaded-1]); + for (unsigned i = 1; i != BytesLoaded; ++i) { + ResultVal <<= 8; + ResultVal |= RawBytes[BytesLoaded-1-i]; + } + + return ConstantInt::get(IntType->getContext(), ResultVal); +} + +/// ConstantFoldLoadFromConstPtr - Return the value that a load from C would +/// produce if it is constant and determinable. If this is not determinable, +/// return null. +Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, + const TargetData *TD) { + // First, try the easy cases: + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) + if (GV->isConstant() && GV->hasDefinitiveInitializer()) + return GV->getInitializer(); + + // If the loaded value isn't a constant expr, we can't handle it. + ConstantExpr *CE = dyn_cast<ConstantExpr>(C); + if (!CE) return 0; + + if (CE->getOpcode() == Instruction::GetElementPtr) { + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0))) + if (GV->isConstant() && GV->hasDefinitiveInitializer()) + if (Constant *V = + ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE)) + return V; + } + + // Instead of loading constant c string, use corresponding integer value + // directly if string length is small enough. + std::string Str; + if (TD && GetConstantStringInfo(CE, Str) && !Str.empty()) { + unsigned StrLen = Str.length(); + const Type *Ty = cast<PointerType>(CE->getType())->getElementType(); + unsigned NumBits = Ty->getPrimitiveSizeInBits(); + // Replace load with immediate integer if the result is an integer or fp + // value. + if ((NumBits >> 3) == StrLen + 1 && (NumBits & 7) == 0 && + (isa<IntegerType>(Ty) || Ty->isFloatingPointTy())) { + APInt StrVal(NumBits, 0); + APInt SingleChar(NumBits, 0); + if (TD->isLittleEndian()) { + for (signed i = StrLen-1; i >= 0; i--) { + SingleChar = (uint64_t) Str[i] & UCHAR_MAX; + StrVal = (StrVal << 8) | SingleChar; + } + } else { + for (unsigned i = 0; i < StrLen; i++) { + SingleChar = (uint64_t) Str[i] & UCHAR_MAX; + StrVal = (StrVal << 8) | SingleChar; + } + // Append NULL at the end. + SingleChar = 0; + StrVal = (StrVal << 8) | SingleChar; + } + + Constant *Res = ConstantInt::get(CE->getContext(), StrVal); + if (Ty->isFloatingPointTy()) + Res = ConstantExpr::getBitCast(Res, Ty); + return Res; + } + } + + // If this load comes from anywhere in a constant global, and if the global + // is all undef or zero, we know what it loads. + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getUnderlyingObject())){ + if (GV->isConstant() && GV->hasDefinitiveInitializer()) { + const Type *ResTy = cast<PointerType>(C->getType())->getElementType(); + if (GV->getInitializer()->isNullValue()) + return Constant::getNullValue(ResTy); + if (isa<UndefValue>(GV->getInitializer())) + return UndefValue::get(ResTy); + } + } + + // Try hard to fold loads from bitcasted strange and non-type-safe things. We + // currently don't do any of this for big endian systems. It can be + // generalized in the future if someone is interested. + if (TD && TD->isLittleEndian()) + return FoldReinterpretLoadFromConstPtr(CE, *TD); + return 0; +} + +static Constant *ConstantFoldLoadInst(const LoadInst *LI, const TargetData *TD){ + if (LI->isVolatile()) return 0; + + if (Constant *C = dyn_cast<Constant>(LI->getOperand(0))) + return ConstantFoldLoadFromConstPtr(C, TD); + + return 0; +} + +/// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression. +/// Attempt to symbolically evaluate the result of a binary operator merging +/// these together. If target data info is available, it is provided as TD, +/// otherwise TD is null. +static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, + Constant *Op1, const TargetData *TD){ + // SROA + + // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl. + // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute + // bits. + + + // If the constant expr is something like &A[123] - &A[4].f, fold this into a + // constant. This happens frequently when iterating over a global array. + if (Opc == Instruction::Sub && TD) { + GlobalValue *GV1, *GV2; + int64_t Offs1, Offs2; + + if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *TD)) + if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *TD) && + GV1 == GV2) { + // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow. + return ConstantInt::get(Op0->getType(), Offs1-Offs2); + } + } + + return 0; +} + +/// CastGEPIndices - If array indices are not pointer-sized integers, +/// explicitly cast them so that they aren't implicitly casted by the +/// getelementptr. +static Constant *CastGEPIndices(Constant *const *Ops, unsigned NumOps, + const Type *ResultTy, + const TargetData *TD) { + if (!TD) return 0; + const Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext()); + + bool Any = false; + SmallVector<Constant*, 32> NewIdxs; + for (unsigned i = 1; i != NumOps; ++i) { + if ((i == 1 || + !isa<StructType>(GetElementPtrInst::getIndexedType(Ops[0]->getType(), + reinterpret_cast<Value *const *>(Ops+1), + i-1))) && + Ops[i]->getType() != IntPtrTy) { + Any = true; + NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i], + true, + IntPtrTy, + true), + Ops[i], IntPtrTy)); + } else + NewIdxs.push_back(Ops[i]); + } + if (!Any) return 0; + + Constant *C = + ConstantExpr::getGetElementPtr(Ops[0], &NewIdxs[0], NewIdxs.size()); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + C = Folded; + return C; +} + +/// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP +/// constant expression, do so. +static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, + const Type *ResultTy, + const TargetData *TD) { + Constant *Ptr = Ops[0]; + if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized()) + return 0; + + unsigned BitWidth = + TD->getTypeSizeInBits(TD->getIntPtrType(Ptr->getContext())); + + // If this is a constant expr gep that is effectively computing an + // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12' + for (unsigned i = 1; i != NumOps; ++i) + if (!isa<ConstantInt>(Ops[i])) + return 0; + + APInt Offset = APInt(BitWidth, + TD->getIndexedOffset(Ptr->getType(), + (Value**)Ops+1, NumOps-1)); + Ptr = cast<Constant>(Ptr->stripPointerCasts()); + + // If this is a GEP of a GEP, fold it all into a single GEP. + while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) { + SmallVector<Value *, 4> NestedOps(GEP->op_begin()+1, GEP->op_end()); + + // Do not try the incorporate the sub-GEP if some index is not a number. + bool AllConstantInt = true; + for (unsigned i = 0, e = NestedOps.size(); i != e; ++i) + if (!isa<ConstantInt>(NestedOps[i])) { + AllConstantInt = false; + break; + } + if (!AllConstantInt) + break; + + Ptr = cast<Constant>(GEP->getOperand(0)); + Offset += APInt(BitWidth, + TD->getIndexedOffset(Ptr->getType(), + (Value**)NestedOps.data(), + NestedOps.size())); + Ptr = cast<Constant>(Ptr->stripPointerCasts()); + } + + // If the base value for this address is a literal integer value, fold the + // getelementptr to the resulting integer value casted to the pointer type. + APInt BasePtr(BitWidth, 0); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) + if (CE->getOpcode() == Instruction::IntToPtr) + if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0))) { + BasePtr = Base->getValue(); + BasePtr.zextOrTrunc(BitWidth); + } + if (Ptr->isNullValue() || BasePtr != 0) { + Constant *C = ConstantInt::get(Ptr->getContext(), Offset+BasePtr); + return ConstantExpr::getIntToPtr(C, ResultTy); + } + + // Otherwise form a regular getelementptr. Recompute the indices so that + // we eliminate over-indexing of the notional static type array bounds. + // This makes it easy to determine if the getelementptr is "inbounds". + // Also, this helps GlobalOpt do SROA on GlobalVariables. + const Type *Ty = Ptr->getType(); + SmallVector<Constant*, 32> NewIdxs; + do { + if (const SequentialType *ATy = dyn_cast<SequentialType>(Ty)) { + if (ATy->isPointerTy()) { + // The only pointer indexing we'll do is on the first index of the GEP. + if (!NewIdxs.empty()) + break; + + // Only handle pointers to sized types, not pointers to functions. + if (!ATy->getElementType()->isSized()) + return 0; + } + + // Determine which element of the array the offset points into. + APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType())); + if (ElemSize == 0) + return 0; + APInt NewIdx = Offset.udiv(ElemSize); + Offset -= NewIdx * ElemSize; + NewIdxs.push_back(ConstantInt::get(TD->getIntPtrType(Ty->getContext()), + NewIdx)); + Ty = ATy->getElementType(); + } else if (const StructType *STy = dyn_cast<StructType>(Ty)) { + // Determine which field of the struct the offset points into. The + // getZExtValue is at least as safe as the StructLayout API because we + // know the offset is within the struct at this point. + const StructLayout &SL = *TD->getStructLayout(STy); + unsigned ElIdx = SL.getElementContainingOffset(Offset.getZExtValue()); + NewIdxs.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()), + ElIdx)); + Offset -= APInt(BitWidth, SL.getElementOffset(ElIdx)); + Ty = STy->getTypeAtIndex(ElIdx); + } else { + // We've reached some non-indexable type. + break; + } + } while (Ty != cast<PointerType>(ResultTy)->getElementType()); + + // If we haven't used up the entire offset by descending the static + // type, then the offset is pointing into the middle of an indivisible + // member, so we can't simplify it. + if (Offset != 0) + return 0; + + // Create a GEP. + Constant *C = + ConstantExpr::getGetElementPtr(Ptr, &NewIdxs[0], NewIdxs.size()); + assert(cast<PointerType>(C->getType())->getElementType() == Ty && + "Computed GetElementPtr has unexpected type!"); + + // If we ended up indexing a member with a type that doesn't match + // the type of what the original indices indexed, add a cast. + if (Ty != cast<PointerType>(ResultTy)->getElementType()) + C = FoldBitCast(C, ResultTy, *TD); + + return C; +} + + + +//===----------------------------------------------------------------------===// +// Constant Folding public APIs +//===----------------------------------------------------------------------===// + + +/// ConstantFoldInstruction - Attempt to constant fold the specified +/// instruction. If successful, the constant result is returned, if not, null +/// is returned. Note that this function can only fail when attempting to fold +/// instructions like loads and stores, which have no constant expression form. +/// +Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) { + if (PHINode *PN = dyn_cast<PHINode>(I)) { + if (PN->getNumIncomingValues() == 0) + return UndefValue::get(PN->getType()); + + Constant *Result = dyn_cast<Constant>(PN->getIncomingValue(0)); + if (Result == 0) return 0; + + // Handle PHI nodes specially here... + for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) != Result && PN->getIncomingValue(i) != PN) + return 0; // Not all the same incoming constants... + + // If we reach here, all incoming values are the same constant. + return Result; + } + + // Scan the operand list, checking to see if they are all constants, if so, + // hand off to ConstantFoldInstOperands. + SmallVector<Constant*, 8> Ops; + for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) + if (Constant *Op = dyn_cast<Constant>(*i)) + Ops.push_back(Op); + else + return 0; // All operands not constant! + + if (const CmpInst *CI = dyn_cast<CmpInst>(I)) + return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1], + TD); + + if (const LoadInst *LI = dyn_cast<LoadInst>(I)) + return ConstantFoldLoadInst(LI, TD); + + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), + Ops.data(), Ops.size(), TD); +} + +/// ConstantFoldConstantExpression - Attempt to fold the constant expression +/// using the specified TargetData. If successful, the constant result is +/// result is returned, if not, null is returned. +Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, + const TargetData *TD) { + SmallVector<Constant*, 8> Ops; + for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) { + Constant *NewC = cast<Constant>(*i); + // Recursively fold the ConstantExpr's operands. + if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) + NewC = ConstantFoldConstantExpression(NewCE, TD); + Ops.push_back(NewC); + } + + if (CE->isCompare()) + return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1], + TD); + return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), + Ops.data(), Ops.size(), TD); +} + +/// ConstantFoldInstOperands - Attempt to constant fold an instruction with the +/// specified opcode and operands. If successful, the constant result is +/// returned, if not, null is returned. Note that this function can fail when +/// attempting to fold instructions like loads and stores, which have no +/// constant expression form. +/// +/// TODO: This function neither utilizes nor preserves nsw/nuw/inbounds/etc +/// information, due to only being passed an opcode and operands. Constant +/// folding using this function strips this information. +/// +Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, + Constant* const* Ops, unsigned NumOps, + const TargetData *TD) { + // Handle easy binops first. + if (Instruction::isBinaryOp(Opcode)) { + if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1])) + if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD)) + return C; + + return ConstantExpr::get(Opcode, Ops[0], Ops[1]); + } + + switch (Opcode) { + default: return 0; + case Instruction::ICmp: + case Instruction::FCmp: assert(0 && "Invalid for compares"); + case Instruction::Call: + if (Function *F = dyn_cast<Function>(Ops[NumOps - 1])) + if (canConstantFoldCallTo(F)) + return ConstantFoldCall(F, Ops, NumOps - 1); + return 0; + case Instruction::PtrToInt: + // If the input is a inttoptr, eliminate the pair. This requires knowing + // the width of a pointer, so it can't be done in ConstantExpr::getCast. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) { + if (TD && CE->getOpcode() == Instruction::IntToPtr) { + Constant *Input = CE->getOperand(0); + unsigned InWidth = Input->getType()->getScalarSizeInBits(); + if (TD->getPointerSizeInBits() < InWidth) { + Constant *Mask = + ConstantInt::get(CE->getContext(), APInt::getLowBitsSet(InWidth, + TD->getPointerSizeInBits())); + Input = ConstantExpr::getAnd(Input, Mask); + } + // Do a zext or trunc to get to the dest size. + return ConstantExpr::getIntegerCast(Input, DestTy, false); + } + } + return ConstantExpr::getCast(Opcode, Ops[0], DestTy); + case Instruction::IntToPtr: + // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if + // the int size is >= the ptr size. This requires knowing the width of a + // pointer, so it can't be done in ConstantExpr::getCast. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) + if (TD && + TD->getPointerSizeInBits() <= CE->getType()->getScalarSizeInBits() && + CE->getOpcode() == Instruction::PtrToInt) + return FoldBitCast(CE->getOperand(0), DestTy, *TD); + + return ConstantExpr::getCast(Opcode, Ops[0], DestTy); + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPToUI: + case Instruction::FPToSI: + return ConstantExpr::getCast(Opcode, Ops[0], DestTy); + case Instruction::BitCast: + if (TD) + return FoldBitCast(Ops[0], DestTy, *TD); + return ConstantExpr::getBitCast(Ops[0], DestTy); + case Instruction::Select: + return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]); + case Instruction::ExtractElement: + return ConstantExpr::getExtractElement(Ops[0], Ops[1]); + case Instruction::InsertElement: + return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]); + case Instruction::ShuffleVector: + return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]); + case Instruction::GetElementPtr: + if (Constant *C = CastGEPIndices(Ops, NumOps, DestTy, TD)) + return C; + if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, TD)) + return C; + + return ConstantExpr::getGetElementPtr(Ops[0], Ops+1, NumOps-1); + } +} + +/// ConstantFoldCompareInstOperands - Attempt to constant fold a compare +/// instruction (icmp/fcmp) with the specified operands. If it fails, it +/// returns a constant expression of the specified operands. +/// +Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, + Constant *Ops0, Constant *Ops1, + const TargetData *TD) { + // fold: icmp (inttoptr x), null -> icmp x, 0 + // fold: icmp (ptrtoint x), 0 -> icmp x, null + // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y + // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y + // + // ConstantExpr::getCompare cannot do this, because it doesn't have TD + // around to know if bit truncation is happening. + if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) { + if (TD && Ops1->isNullValue()) { + const Type *IntPtrTy = TD->getIntPtrType(CE0->getContext()); + if (CE0->getOpcode() == Instruction::IntToPtr) { + // Convert the integer value to the right size to ensure we get the + // proper extension or truncation. + Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0), + IntPtrTy, false); + Constant *Null = Constant::getNullValue(C->getType()); + return ConstantFoldCompareInstOperands(Predicate, C, Null, TD); + } + + // Only do this transformation if the int is intptrty in size, otherwise + // there is a truncation or extension that we aren't modeling. + if (CE0->getOpcode() == Instruction::PtrToInt && + CE0->getType() == IntPtrTy) { + Constant *C = CE0->getOperand(0); + Constant *Null = Constant::getNullValue(C->getType()); + return ConstantFoldCompareInstOperands(Predicate, C, Null, TD); + } + } + + if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) { + if (TD && CE0->getOpcode() == CE1->getOpcode()) { + const Type *IntPtrTy = TD->getIntPtrType(CE0->getContext()); + + if (CE0->getOpcode() == Instruction::IntToPtr) { + // Convert the integer value to the right size to ensure we get the + // proper extension or truncation. + Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0), + IntPtrTy, false); + Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0), + IntPtrTy, false); + return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD); + } + + // Only do this transformation if the int is intptrty in size, otherwise + // there is a truncation or extension that we aren't modeling. + if ((CE0->getOpcode() == Instruction::PtrToInt && + CE0->getType() == IntPtrTy && + CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType())) + return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), + CE1->getOperand(0), TD); + } + } + + // icmp eq (or x, y), 0 -> (icmp eq x, 0) & (icmp eq y, 0) + // icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0) + if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) && + CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) { + Constant *LHS = + ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1,TD); + Constant *RHS = + ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1,TD); + unsigned OpC = + Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; + Constant *Ops[] = { LHS, RHS }; + return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, 2, TD); + } + } + + return ConstantExpr::getCompare(Predicate, Ops0, Ops1); +} + + +/// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a +/// getelementptr constantexpr, return the constant value being addressed by the +/// constant expression, or null if something is funny and we can't decide. +Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, + ConstantExpr *CE) { + if (CE->getOperand(1) != Constant::getNullValue(CE->getOperand(1)->getType())) + return 0; // Do not allow stepping over the value! + + // Loop over all of the operands, tracking down which value we are + // addressing... + gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE); + for (++I; I != E; ++I) + if (const StructType *STy = dyn_cast<StructType>(*I)) { + ConstantInt *CU = cast<ConstantInt>(I.getOperand()); + assert(CU->getZExtValue() < STy->getNumElements() && + "Struct index out of range!"); + unsigned El = (unsigned)CU->getZExtValue(); + if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) { + C = CS->getOperand(El); + } else if (isa<ConstantAggregateZero>(C)) { + C = Constant::getNullValue(STy->getElementType(El)); + } else if (isa<UndefValue>(C)) { + C = UndefValue::get(STy->getElementType(El)); + } else { + return 0; + } + } else if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand())) { + if (const ArrayType *ATy = dyn_cast<ArrayType>(*I)) { + if (CI->getZExtValue() >= ATy->getNumElements()) + return 0; + if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) + C = CA->getOperand(CI->getZExtValue()); + else if (isa<ConstantAggregateZero>(C)) + C = Constant::getNullValue(ATy->getElementType()); + else if (isa<UndefValue>(C)) + C = UndefValue::get(ATy->getElementType()); + else + return 0; + } else if (const VectorType *VTy = dyn_cast<VectorType>(*I)) { + if (CI->getZExtValue() >= VTy->getNumElements()) + return 0; + if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) + C = CP->getOperand(CI->getZExtValue()); + else if (isa<ConstantAggregateZero>(C)) + C = Constant::getNullValue(VTy->getElementType()); + else if (isa<UndefValue>(C)) + C = UndefValue::get(VTy->getElementType()); + else + return 0; + } else { + return 0; + } + } else { + return 0; + } + return C; +} + + +//===----------------------------------------------------------------------===// +// Constant Folding for Calls +// + +/// canConstantFoldCallTo - Return true if its even possible to fold a call to +/// the specified function. +bool +llvm::canConstantFoldCallTo(const Function *F) { + switch (F->getIntrinsicID()) { + case Intrinsic::sqrt: + case Intrinsic::powi: + case Intrinsic::bswap: + case Intrinsic::ctpop: + case Intrinsic::ctlz: + case Intrinsic::cttz: + case Intrinsic::uadd_with_overflow: + case Intrinsic::usub_with_overflow: + case Intrinsic::sadd_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::convert_from_fp16: + case Intrinsic::convert_to_fp16: + return true; + default: + return false; + case 0: break; + } + + if (!F->hasName()) return false; + StringRef Name = F->getName(); + + // In these cases, the check of the length is required. We don't want to + // return true for a name like "cos\0blah" which strcmp would return equal to + // "cos", but has length 8. + switch (Name[0]) { + default: return false; + case 'a': + return Name == "acos" || Name == "asin" || + Name == "atan" || Name == "atan2"; + case 'c': + return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh"; + case 'e': + return Name == "exp"; + case 'f': + return Name == "fabs" || Name == "fmod" || Name == "floor"; + case 'l': + return Name == "log" || Name == "log10"; + case 'p': + return Name == "pow"; + case 's': + return Name == "sin" || Name == "sinh" || Name == "sqrt" || + Name == "sinf" || Name == "sqrtf"; + case 't': + return Name == "tan" || Name == "tanh"; + } +} + +static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, + const Type *Ty) { + errno = 0; + V = NativeFP(V); + if (errno != 0) { + errno = 0; + return 0; + } + + if (Ty->isFloatTy()) + return ConstantFP::get(Ty->getContext(), APFloat((float)V)); + if (Ty->isDoubleTy()) + return ConstantFP::get(Ty->getContext(), APFloat(V)); + llvm_unreachable("Can only constant fold float/double"); + return 0; // dummy return to suppress warning +} + +static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), + double V, double W, const Type *Ty) { + errno = 0; + V = NativeFP(V, W); + if (errno != 0) { + errno = 0; + return 0; + } + + if (Ty->isFloatTy()) + return ConstantFP::get(Ty->getContext(), APFloat((float)V)); + if (Ty->isDoubleTy()) + return ConstantFP::get(Ty->getContext(), APFloat(V)); + llvm_unreachable("Can only constant fold float/double"); + return 0; // dummy return to suppress warning +} + +/// ConstantFoldCall - Attempt to constant fold a call to the specified function +/// with the specified arguments, returning null if unsuccessful. +Constant * +llvm::ConstantFoldCall(Function *F, + Constant *const *Operands, unsigned NumOperands) { + if (!F->hasName()) return 0; + StringRef Name = F->getName(); + + const Type *Ty = F->getReturnType(); + if (NumOperands == 1) { + if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) { + if (Name == "llvm.convert.to.fp16") { + APFloat Val(Op->getValueAPF()); + + bool lost = false; + Val.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &lost); + + return ConstantInt::get(F->getContext(), Val.bitcastToAPInt()); + } + + if (!Ty->isFloatTy() && !Ty->isDoubleTy()) + return 0; + /// Currently APFloat versions of these functions do not exist, so we use + /// the host native double versions. Float versions are not called + /// directly but for all these it is true (float)(f((double)arg)) == + /// f(arg). Long double not supported yet. + double V = Ty->isFloatTy() ? (double)Op->getValueAPF().convertToFloat() : + Op->getValueAPF().convertToDouble(); + switch (Name[0]) { + case 'a': + if (Name == "acos") + return ConstantFoldFP(acos, V, Ty); + else if (Name == "asin") + return ConstantFoldFP(asin, V, Ty); + else if (Name == "atan") + return ConstantFoldFP(atan, V, Ty); + break; + case 'c': + if (Name == "ceil") + return ConstantFoldFP(ceil, V, Ty); + else if (Name == "cos") + return ConstantFoldFP(cos, V, Ty); + else if (Name == "cosh") + return ConstantFoldFP(cosh, V, Ty); + else if (Name == "cosf") + return ConstantFoldFP(cos, V, Ty); + break; + case 'e': + if (Name == "exp") + return ConstantFoldFP(exp, V, Ty); + break; + case 'f': + if (Name == "fabs") + return ConstantFoldFP(fabs, V, Ty); + else if (Name == "floor") + return ConstantFoldFP(floor, V, Ty); + break; + case 'l': + if (Name == "log" && V > 0) + return ConstantFoldFP(log, V, Ty); + else if (Name == "log10" && V > 0) + return ConstantFoldFP(log10, V, Ty); + else if (Name == "llvm.sqrt.f32" || + Name == "llvm.sqrt.f64") { + if (V >= -0.0) + return ConstantFoldFP(sqrt, V, Ty); + else // Undefined + return Constant::getNullValue(Ty); + } + break; + case 's': + if (Name == "sin") + return ConstantFoldFP(sin, V, Ty); + else if (Name == "sinh") + return ConstantFoldFP(sinh, V, Ty); + else if (Name == "sqrt" && V >= 0) + return ConstantFoldFP(sqrt, V, Ty); + else if (Name == "sqrtf" && V >= 0) + return ConstantFoldFP(sqrt, V, Ty); + else if (Name == "sinf") + return ConstantFoldFP(sin, V, Ty); + break; + case 't': + if (Name == "tan") + return ConstantFoldFP(tan, V, Ty); + else if (Name == "tanh") + return ConstantFoldFP(tanh, V, Ty); + break; + default: + break; + } + return 0; + } + + + if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) { + if (Name.startswith("llvm.bswap")) + return ConstantInt::get(F->getContext(), Op->getValue().byteSwap()); + else if (Name.startswith("llvm.ctpop")) + return ConstantInt::get(Ty, Op->getValue().countPopulation()); + else if (Name.startswith("llvm.cttz")) + return ConstantInt::get(Ty, Op->getValue().countTrailingZeros()); + else if (Name.startswith("llvm.ctlz")) + return ConstantInt::get(Ty, Op->getValue().countLeadingZeros()); + else if (Name == "llvm.convert.from.fp16") { + APFloat Val(Op->getValue()); + + bool lost = false; + APFloat::opStatus status = + Val.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &lost); + + // Conversion is always precise. + status = status; + assert(status == APFloat::opOK && !lost && + "Precision lost during fp16 constfolding"); + + return ConstantFP::get(F->getContext(), Val); + } + return 0; + } + + if (isa<UndefValue>(Operands[0])) { + if (Name.startswith("llvm.bswap")) + return Operands[0]; + return 0; + } + + return 0; + } + + if (NumOperands == 2) { + if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) { + if (!Ty->isFloatTy() && !Ty->isDoubleTy()) + return 0; + double Op1V = Ty->isFloatTy() ? + (double)Op1->getValueAPF().convertToFloat() : + Op1->getValueAPF().convertToDouble(); + if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) { + if (Op2->getType() != Op1->getType()) + return 0; + + double Op2V = Ty->isFloatTy() ? + (double)Op2->getValueAPF().convertToFloat(): + Op2->getValueAPF().convertToDouble(); + + if (Name == "pow") + return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); + if (Name == "fmod") + return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty); + if (Name == "atan2") + return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); + } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) { + if (Name == "llvm.powi.f32") + return ConstantFP::get(F->getContext(), + APFloat((float)std::pow((float)Op1V, + (int)Op2C->getZExtValue()))); + if (Name == "llvm.powi.f64") + return ConstantFP::get(F->getContext(), + APFloat((double)std::pow((double)Op1V, + (int)Op2C->getZExtValue()))); + } + return 0; + } + + + if (ConstantInt *Op1 = dyn_cast<ConstantInt>(Operands[0])) { + if (ConstantInt *Op2 = dyn_cast<ConstantInt>(Operands[1])) { + switch (F->getIntrinsicID()) { + default: break; + case Intrinsic::uadd_with_overflow: { + Constant *Res = ConstantExpr::getAdd(Op1, Op2); // result. + Constant *Ops[] = { + Res, ConstantExpr::getICmp(CmpInst::ICMP_ULT, Res, Op1) // overflow. + }; + return ConstantStruct::get(F->getContext(), Ops, 2, false); + } + case Intrinsic::usub_with_overflow: { + Constant *Res = ConstantExpr::getSub(Op1, Op2); // result. + Constant *Ops[] = { + Res, ConstantExpr::getICmp(CmpInst::ICMP_UGT, Res, Op1) // overflow. + }; + return ConstantStruct::get(F->getContext(), Ops, 2, false); + } + case Intrinsic::sadd_with_overflow: { + Constant *Res = ConstantExpr::getAdd(Op1, Op2); // result. + Constant *Overflow = ConstantExpr::getSelect( + ConstantExpr::getICmp(CmpInst::ICMP_SGT, + ConstantInt::get(Op1->getType(), 0), Op1), + ConstantExpr::getICmp(CmpInst::ICMP_SGT, Res, Op2), + ConstantExpr::getICmp(CmpInst::ICMP_SLT, Res, Op2)); // overflow. + + Constant *Ops[] = { Res, Overflow }; + return ConstantStruct::get(F->getContext(), Ops, 2, false); + } + case Intrinsic::ssub_with_overflow: { + Constant *Res = ConstantExpr::getSub(Op1, Op2); // result. + Constant *Overflow = ConstantExpr::getSelect( + ConstantExpr::getICmp(CmpInst::ICMP_SGT, + ConstantInt::get(Op2->getType(), 0), Op2), + ConstantExpr::getICmp(CmpInst::ICMP_SLT, Res, Op1), + ConstantExpr::getICmp(CmpInst::ICMP_SGT, Res, Op1)); // overflow. + + Constant *Ops[] = { Res, Overflow }; + return ConstantStruct::get(F->getContext(), Ops, 2, false); + } + } + } + + return 0; + } + return 0; + } + return 0; +} + diff --git a/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp b/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp new file mode 100644 index 0000000..0567750 --- /dev/null +++ b/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp @@ -0,0 +1,105 @@ +//===- DbgInfoPrinter.cpp - Print debug info in a human readable form ------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass that prints instructions, and associated debug +// info: +// +// - source/line/col information +// - original variable name +// - original type name +// +//===----------------------------------------------------------------------===// + +#include "llvm/Pass.h" +#include "llvm/Function.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Metadata.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt<bool> +PrintDirectory("print-fullpath", + cl::desc("Print fullpath when printing debug info"), + cl::Hidden); + +namespace { + class PrintDbgInfo : public FunctionPass { + raw_ostream &Out; + void printVariableDeclaration(const Value *V); + public: + static char ID; // Pass identification + PrintDbgInfo() : FunctionPass(ID), Out(errs()) {} + + virtual bool runOnFunction(Function &F); + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; + char PrintDbgInfo::ID = 0; + INITIALIZE_PASS(PrintDbgInfo, "print-dbginfo", + "Print debug info in human readable form", false, false); +} + +FunctionPass *llvm::createDbgInfoPrinterPass() { return new PrintDbgInfo(); } + +void PrintDbgInfo::printVariableDeclaration(const Value *V) { + std::string DisplayName, File, Directory, Type; + unsigned LineNo; + + if (!getLocationInfo(V, DisplayName, Type, LineNo, File, Directory)) + return; + + Out << "; "; + WriteAsOperand(Out, V, false, 0); + Out << " is variable " << DisplayName + << " of type " << Type << " declared at "; + + if (PrintDirectory) + Out << Directory << "/"; + + Out << File << ":" << LineNo << "\n"; +} + +bool PrintDbgInfo::runOnFunction(Function &F) { + if (F.isDeclaration()) + return false; + + Out << "function " << F.getName() << "\n\n"; + + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { + BasicBlock *BB = I; + + if (I != F.begin() && (pred_begin(BB) == pred_end(BB))) + // Skip dead blocks. + continue; + + Out << BB->getName(); + Out << ":"; + + Out << "\n"; + + for (BasicBlock::const_iterator i = BB->begin(), e = BB->end(); + i != e; ++i) { + + printVariableDeclaration(i); + + if (const User *U = dyn_cast<User>(i)) { + for(unsigned i=0;i<U->getNumOperands();i++) + printVariableDeclaration(U->getOperand(i)); + } + } + } + return false; +} diff --git a/contrib/llvm/lib/Analysis/DebugInfo.cpp b/contrib/llvm/lib/Analysis/DebugInfo.cpp new file mode 100644 index 0000000..5ca89c6 --- /dev/null +++ b/contrib/llvm/lib/Analysis/DebugInfo.cpp @@ -0,0 +1,1579 @@ +//===--- DebugInfo.cpp - Debug Information Helper Classes -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the helper classes used to build and interpret debug +// information in LLVM IR form. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Intrinsics.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; +using namespace llvm::dwarf; + +//===----------------------------------------------------------------------===// +// DIDescriptor +//===----------------------------------------------------------------------===// + +DIDescriptor::DIDescriptor(const DIFile F) : DbgNode(F.DbgNode) { +} + +DIDescriptor::DIDescriptor(const DISubprogram F) : DbgNode(F.DbgNode) { +} + +DIDescriptor::DIDescriptor(const DILexicalBlock F) : DbgNode(F.DbgNode) { +} + +DIDescriptor::DIDescriptor(const DIVariable F) : DbgNode(F.DbgNode) { +} + +DIDescriptor::DIDescriptor(const DIType F) : DbgNode(F.DbgNode) { +} + +StringRef +DIDescriptor::getStringField(unsigned Elt) const { + if (DbgNode == 0) + return StringRef(); + + if (Elt < DbgNode->getNumOperands()) + if (MDString *MDS = dyn_cast_or_null<MDString>(DbgNode->getOperand(Elt))) + return MDS->getString(); + + return StringRef(); +} + +uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const { + if (DbgNode == 0) + return 0; + + if (Elt < DbgNode->getNumOperands()) + if (ConstantInt *CI = dyn_cast<ConstantInt>(DbgNode->getOperand(Elt))) + return CI->getZExtValue(); + + return 0; +} + +DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const { + if (DbgNode == 0) + return DIDescriptor(); + + if (Elt < DbgNode->getNumOperands()) + return + DIDescriptor(dyn_cast_or_null<const MDNode>(DbgNode->getOperand(Elt))); + return DIDescriptor(); +} + +GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const { + if (DbgNode == 0) + return 0; + + if (Elt < DbgNode->getNumOperands()) + return dyn_cast_or_null<GlobalVariable>(DbgNode->getOperand(Elt)); + return 0; +} + +Constant *DIDescriptor::getConstantField(unsigned Elt) const { + if (DbgNode == 0) + return 0; + + if (Elt < DbgNode->getNumOperands()) + return dyn_cast_or_null<Constant>(DbgNode->getOperand(Elt)); + return 0; +} + +Function *DIDescriptor::getFunctionField(unsigned Elt) const { + if (DbgNode == 0) + return 0; + + if (Elt < DbgNode->getNumOperands()) + return dyn_cast_or_null<Function>(DbgNode->getOperand(Elt)); + return 0; +} + +unsigned DIVariable::getNumAddrElements() const { + return DbgNode->getNumOperands()-6; +} + + +//===----------------------------------------------------------------------===// +// Predicates +//===----------------------------------------------------------------------===// + +/// isBasicType - Return true if the specified tag is legal for +/// DIBasicType. +bool DIDescriptor::isBasicType() const { + return DbgNode && getTag() == dwarf::DW_TAG_base_type; +} + +/// isDerivedType - Return true if the specified tag is legal for DIDerivedType. +bool DIDescriptor::isDerivedType() const { + if (!DbgNode) return false; + switch (getTag()) { + case dwarf::DW_TAG_typedef: + case dwarf::DW_TAG_pointer_type: + case dwarf::DW_TAG_reference_type: + case dwarf::DW_TAG_const_type: + case dwarf::DW_TAG_volatile_type: + case dwarf::DW_TAG_restrict_type: + case dwarf::DW_TAG_member: + case dwarf::DW_TAG_inheritance: + case dwarf::DW_TAG_friend: + return true; + default: + // CompositeTypes are currently modelled as DerivedTypes. + return isCompositeType(); + } +} + +/// isCompositeType - Return true if the specified tag is legal for +/// DICompositeType. +bool DIDescriptor::isCompositeType() const { + if (!DbgNode) return false; + switch (getTag()) { + case dwarf::DW_TAG_array_type: + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_enumeration_type: + case dwarf::DW_TAG_vector_type: + case dwarf::DW_TAG_subroutine_type: + case dwarf::DW_TAG_class_type: + return true; + default: + return false; + } +} + +/// isVariable - Return true if the specified tag is legal for DIVariable. +bool DIDescriptor::isVariable() const { + if (!DbgNode) return false; + switch (getTag()) { + case dwarf::DW_TAG_auto_variable: + case dwarf::DW_TAG_arg_variable: + case dwarf::DW_TAG_return_variable: + return true; + default: + return false; + } +} + +/// isType - Return true if the specified tag is legal for DIType. +bool DIDescriptor::isType() const { + return isBasicType() || isCompositeType() || isDerivedType(); +} + +/// isSubprogram - Return true if the specified tag is legal for +/// DISubprogram. +bool DIDescriptor::isSubprogram() const { + return DbgNode && getTag() == dwarf::DW_TAG_subprogram; +} + +/// isGlobalVariable - Return true if the specified tag is legal for +/// DIGlobalVariable. +bool DIDescriptor::isGlobalVariable() const { + return DbgNode && (getTag() == dwarf::DW_TAG_variable || + getTag() == dwarf::DW_TAG_constant); +} + +/// isGlobal - Return true if the specified tag is legal for DIGlobal. +bool DIDescriptor::isGlobal() const { + return isGlobalVariable(); +} + +/// isScope - Return true if the specified tag is one of the scope +/// related tag. +bool DIDescriptor::isScope() const { + if (!DbgNode) return false; + switch (getTag()) { + case dwarf::DW_TAG_compile_unit: + case dwarf::DW_TAG_lexical_block: + case dwarf::DW_TAG_subprogram: + case dwarf::DW_TAG_namespace: + return true; + default: + break; + } + return false; +} + +/// isCompileUnit - Return true if the specified tag is DW_TAG_compile_unit. +bool DIDescriptor::isCompileUnit() const { + return DbgNode && getTag() == dwarf::DW_TAG_compile_unit; +} + +/// isFile - Return true if the specified tag is DW_TAG_file_type. +bool DIDescriptor::isFile() const { + return DbgNode && getTag() == dwarf::DW_TAG_file_type; +} + +/// isNameSpace - Return true if the specified tag is DW_TAG_namespace. +bool DIDescriptor::isNameSpace() const { + return DbgNode && getTag() == dwarf::DW_TAG_namespace; +} + +/// isLexicalBlock - Return true if the specified tag is DW_TAG_lexical_block. +bool DIDescriptor::isLexicalBlock() const { + return DbgNode && getTag() == dwarf::DW_TAG_lexical_block; +} + +/// isSubrange - Return true if the specified tag is DW_TAG_subrange_type. +bool DIDescriptor::isSubrange() const { + return DbgNode && getTag() == dwarf::DW_TAG_subrange_type; +} + +/// isEnumerator - Return true if the specified tag is DW_TAG_enumerator. +bool DIDescriptor::isEnumerator() const { + return DbgNode && getTag() == dwarf::DW_TAG_enumerator; +} + +//===----------------------------------------------------------------------===// +// Simple Descriptor Constructors and other Methods +//===----------------------------------------------------------------------===// + +DIType::DIType(const MDNode *N) : DIScope(N) { + if (!N) return; + if (!isBasicType() && !isDerivedType() && !isCompositeType()) { + DbgNode = 0; + } +} + +unsigned DIArray::getNumElements() const { + if (!DbgNode) + return 0; + return DbgNode->getNumOperands(); +} + +/// replaceAllUsesWith - Replace all uses of debug info referenced by +/// this descriptor. +void DIType::replaceAllUsesWith(DIDescriptor &D) { + if (!DbgNode) + return; + + // Since we use a TrackingVH for the node, its easy for clients to manufacture + // legitimate situations where they want to replaceAllUsesWith() on something + // which, due to uniquing, has merged with the source. We shield clients from + // this detail by allowing a value to be replaced with replaceAllUsesWith() + // itself. + if (DbgNode != D) { + MDNode *Node = const_cast<MDNode*>(DbgNode); + const MDNode *DN = D; + const Value *V = cast_or_null<Value>(DN); + Node->replaceAllUsesWith(const_cast<Value*>(V)); + MDNode::deleteTemporary(Node); + } +} + +/// Verify - Verify that a compile unit is well formed. +bool DICompileUnit::Verify() const { + if (!DbgNode) + return false; + StringRef N = getFilename(); + if (N.empty()) + return false; + // It is possible that directory and produce string is empty. + return true; +} + +/// Verify - Verify that a type descriptor is well formed. +bool DIType::Verify() const { + if (!DbgNode) + return false; + if (!getContext().Verify()) + return false; + + DICompileUnit CU = getCompileUnit(); + if (!CU.Verify()) + return false; + return true; +} + +/// Verify - Verify that a basic type descriptor is well formed. +bool DIBasicType::Verify() const { + return isBasicType(); +} + +/// Verify - Verify that a derived type descriptor is well formed. +bool DIDerivedType::Verify() const { + return isDerivedType(); +} + +/// Verify - Verify that a composite type descriptor is well formed. +bool DICompositeType::Verify() const { + if (!DbgNode) + return false; + if (!getContext().Verify()) + return false; + + DICompileUnit CU = getCompileUnit(); + if (!CU.Verify()) + return false; + return true; +} + +/// Verify - Verify that a subprogram descriptor is well formed. +bool DISubprogram::Verify() const { + if (!DbgNode) + return false; + + if (!getContext().Verify()) + return false; + + DICompileUnit CU = getCompileUnit(); + if (!CU.Verify()) + return false; + + DICompositeType Ty = getType(); + if (!Ty.Verify()) + return false; + return true; +} + +/// Verify - Verify that a global variable descriptor is well formed. +bool DIGlobalVariable::Verify() const { + if (!DbgNode) + return false; + + if (getDisplayName().empty()) + return false; + + if (!getContext().Verify()) + return false; + + DICompileUnit CU = getCompileUnit(); + if (!CU.Verify()) + return false; + + DIType Ty = getType(); + if (!Ty.Verify()) + return false; + + if (!getGlobal() && !getConstant()) + return false; + + return true; +} + +/// Verify - Verify that a variable descriptor is well formed. +bool DIVariable::Verify() const { + if (!DbgNode) + return false; + + if (!getContext().Verify()) + return false; + + if (!getCompileUnit().Verify()) + return false; + + DIType Ty = getType(); + if (!Ty.Verify()) + return false; + + return true; +} + +/// Verify - Verify that a location descriptor is well formed. +bool DILocation::Verify() const { + if (!DbgNode) + return false; + + return DbgNode->getNumOperands() == 4; +} + +/// Verify - Verify that a namespace descriptor is well formed. +bool DINameSpace::Verify() const { + if (!DbgNode) + return false; + if (getName().empty()) + return false; + if (!getCompileUnit().Verify()) + return false; + return true; +} + +/// getOriginalTypeSize - If this type is derived from a base type then +/// return base type size. +uint64_t DIDerivedType::getOriginalTypeSize() const { + unsigned Tag = getTag(); + if (Tag == dwarf::DW_TAG_member || Tag == dwarf::DW_TAG_typedef || + Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type || + Tag == dwarf::DW_TAG_restrict_type) { + DIType BaseType = getTypeDerivedFrom(); + // If this type is not derived from any type then take conservative + // approach. + if (!BaseType.isValid()) + return getSizeInBits(); + if (BaseType.isDerivedType()) + return DIDerivedType(BaseType).getOriginalTypeSize(); + else + return BaseType.getSizeInBits(); + } + + return getSizeInBits(); +} + +/// isInlinedFnArgument - Return true if this variable provides debugging +/// information for an inlined function arguments. +bool DIVariable::isInlinedFnArgument(const Function *CurFn) { + assert(CurFn && "Invalid function"); + if (!getContext().isSubprogram()) + return false; + // This variable is not inlined function argument if its scope + // does not describe current function. + return !(DISubprogram(getContext()).describes(CurFn)); +} + +/// describes - Return true if this subprogram provides debugging +/// information for the function F. +bool DISubprogram::describes(const Function *F) { + assert(F && "Invalid function"); + if (F == getFunction()) + return true; + StringRef Name = getLinkageName(); + if (Name.empty()) + Name = getName(); + if (F->getName() == Name) + return true; + return false; +} + +unsigned DISubprogram::isOptimized() const { + assert (DbgNode && "Invalid subprogram descriptor!"); + if (DbgNode->getNumOperands() == 16) + return getUnsignedField(15); + return 0; +} + +StringRef DIScope::getFilename() const { + if (!DbgNode) + return StringRef(); + if (isLexicalBlock()) + return DILexicalBlock(DbgNode).getFilename(); + if (isSubprogram()) + return DISubprogram(DbgNode).getFilename(); + if (isCompileUnit()) + return DICompileUnit(DbgNode).getFilename(); + if (isNameSpace()) + return DINameSpace(DbgNode).getFilename(); + if (isType()) + return DIType(DbgNode).getFilename(); + if (isFile()) + return DIFile(DbgNode).getFilename(); + assert(0 && "Invalid DIScope!"); + return StringRef(); +} + +StringRef DIScope::getDirectory() const { + if (!DbgNode) + return StringRef(); + if (isLexicalBlock()) + return DILexicalBlock(DbgNode).getDirectory(); + if (isSubprogram()) + return DISubprogram(DbgNode).getDirectory(); + if (isCompileUnit()) + return DICompileUnit(DbgNode).getDirectory(); + if (isNameSpace()) + return DINameSpace(DbgNode).getDirectory(); + if (isType()) + return DIType(DbgNode).getDirectory(); + if (isFile()) + return DIFile(DbgNode).getDirectory(); + assert(0 && "Invalid DIScope!"); + return StringRef(); +} + +//===----------------------------------------------------------------------===// +// DIDescriptor: dump routines for all descriptors. +//===----------------------------------------------------------------------===// + + +/// print - Print descriptor. +void DIDescriptor::print(raw_ostream &OS) const { + OS << "[" << dwarf::TagString(getTag()) << "] "; + OS.write_hex((intptr_t) &*DbgNode) << ']'; +} + +/// print - Print compile unit. +void DICompileUnit::print(raw_ostream &OS) const { + if (getLanguage()) + OS << " [" << dwarf::LanguageString(getLanguage()) << "] "; + + OS << " [" << getDirectory() << "/" << getFilename() << "]"; +} + +/// print - Print type. +void DIType::print(raw_ostream &OS) const { + if (!DbgNode) return; + + StringRef Res = getName(); + if (!Res.empty()) + OS << " [" << Res << "] "; + + unsigned Tag = getTag(); + OS << " [" << dwarf::TagString(Tag) << "] "; + + // TODO : Print context + getCompileUnit().print(OS); + OS << " [" + << "line " << getLineNumber() << ", " + << getSizeInBits() << " bits, " + << getAlignInBits() << " bit alignment, " + << getOffsetInBits() << " bit offset" + << "] "; + + if (isPrivate()) + OS << " [private] "; + else if (isProtected()) + OS << " [protected] "; + + if (isForwardDecl()) + OS << " [fwd] "; + + if (isBasicType()) + DIBasicType(DbgNode).print(OS); + else if (isDerivedType()) + DIDerivedType(DbgNode).print(OS); + else if (isCompositeType()) + DICompositeType(DbgNode).print(OS); + else { + OS << "Invalid DIType\n"; + return; + } + + OS << "\n"; +} + +/// print - Print basic type. +void DIBasicType::print(raw_ostream &OS) const { + OS << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] "; +} + +/// print - Print derived type. +void DIDerivedType::print(raw_ostream &OS) const { + OS << "\n\t Derived From: "; getTypeDerivedFrom().print(OS); +} + +/// print - Print composite type. +void DICompositeType::print(raw_ostream &OS) const { + DIArray A = getTypeArray(); + OS << " [" << A.getNumElements() << " elements]"; +} + +/// print - Print subprogram. +void DISubprogram::print(raw_ostream &OS) const { + StringRef Res = getName(); + if (!Res.empty()) + OS << " [" << Res << "] "; + + unsigned Tag = getTag(); + OS << " [" << dwarf::TagString(Tag) << "] "; + + // TODO : Print context + getCompileUnit().print(OS); + OS << " [" << getLineNumber() << "] "; + + if (isLocalToUnit()) + OS << " [local] "; + + if (isDefinition()) + OS << " [def] "; + + OS << "\n"; +} + +/// print - Print global variable. +void DIGlobalVariable::print(raw_ostream &OS) const { + OS << " ["; + StringRef Res = getName(); + if (!Res.empty()) + OS << " [" << Res << "] "; + + unsigned Tag = getTag(); + OS << " [" << dwarf::TagString(Tag) << "] "; + + // TODO : Print context + getCompileUnit().print(OS); + OS << " [" << getLineNumber() << "] "; + + if (isLocalToUnit()) + OS << " [local] "; + + if (isDefinition()) + OS << " [def] "; + + if (isGlobalVariable()) + DIGlobalVariable(DbgNode).print(OS); + OS << "]\n"; +} + +/// print - Print variable. +void DIVariable::print(raw_ostream &OS) const { + StringRef Res = getName(); + if (!Res.empty()) + OS << " [" << Res << "] "; + + getCompileUnit().print(OS); + OS << " [" << getLineNumber() << "] "; + getType().print(OS); + OS << "\n"; + + // FIXME: Dump complex addresses +} + +/// dump - Print descriptor to dbgs() with a newline. +void DIDescriptor::dump() const { + print(dbgs()); dbgs() << '\n'; +} + +/// dump - Print compile unit to dbgs() with a newline. +void DICompileUnit::dump() const { + print(dbgs()); dbgs() << '\n'; +} + +/// dump - Print type to dbgs() with a newline. +void DIType::dump() const { + print(dbgs()); dbgs() << '\n'; +} + +/// dump - Print basic type to dbgs() with a newline. +void DIBasicType::dump() const { + print(dbgs()); dbgs() << '\n'; +} + +/// dump - Print derived type to dbgs() with a newline. +void DIDerivedType::dump() const { + print(dbgs()); dbgs() << '\n'; +} + +/// dump - Print composite type to dbgs() with a newline. +void DICompositeType::dump() const { + print(dbgs()); dbgs() << '\n'; +} + +/// dump - Print subprogram to dbgs() with a newline. +void DISubprogram::dump() const { + print(dbgs()); dbgs() << '\n'; +} + +/// dump - Print global variable. +void DIGlobalVariable::dump() const { + print(dbgs()); dbgs() << '\n'; +} + +/// dump - Print variable. +void DIVariable::dump() const { + print(dbgs()); dbgs() << '\n'; +} + +//===----------------------------------------------------------------------===// +// DIFactory: Basic Helpers +//===----------------------------------------------------------------------===// + +DIFactory::DIFactory(Module &m) + : M(m), VMContext(M.getContext()), DeclareFn(0), ValueFn(0) {} + +Constant *DIFactory::GetTagConstant(unsigned TAG) { + assert((TAG & LLVMDebugVersionMask) == 0 && + "Tag too large for debug encoding!"); + return ConstantInt::get(Type::getInt32Ty(VMContext), TAG | LLVMDebugVersion); +} + +//===----------------------------------------------------------------------===// +// DIFactory: Primary Constructors +//===----------------------------------------------------------------------===// + +/// GetOrCreateArray - Create an descriptor for an array of descriptors. +/// This implicitly uniques the arrays created. +DIArray DIFactory::GetOrCreateArray(DIDescriptor *Tys, unsigned NumTys) { + SmallVector<Value*, 16> Elts; + + if (NumTys == 0) + Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext))); + else + for (unsigned i = 0; i != NumTys; ++i) + Elts.push_back(Tys[i]); + + return DIArray(MDNode::get(VMContext,Elts.data(), Elts.size())); +} + +/// GetOrCreateSubrange - Create a descriptor for a value range. This +/// implicitly uniques the values returned. +DISubrange DIFactory::GetOrCreateSubrange(int64_t Lo, int64_t Hi) { + Value *Elts[] = { + GetTagConstant(dwarf::DW_TAG_subrange_type), + ConstantInt::get(Type::getInt64Ty(VMContext), Lo), + ConstantInt::get(Type::getInt64Ty(VMContext), Hi) + }; + + return DISubrange(MDNode::get(VMContext, &Elts[0], 3)); +} + + + +/// CreateCompileUnit - Create a new descriptor for the specified compile +/// unit. Note that this does not unique compile units within the module. +DICompileUnit DIFactory::CreateCompileUnit(unsigned LangID, + StringRef Filename, + StringRef Directory, + StringRef Producer, + bool isMain, + bool isOptimized, + StringRef Flags, + unsigned RunTimeVer) { + Value *Elts[] = { + GetTagConstant(dwarf::DW_TAG_compile_unit), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + ConstantInt::get(Type::getInt32Ty(VMContext), LangID), + MDString::get(VMContext, Filename), + MDString::get(VMContext, Directory), + MDString::get(VMContext, Producer), + ConstantInt::get(Type::getInt1Ty(VMContext), isMain), + ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), + MDString::get(VMContext, Flags), + ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer) + }; + + return DICompileUnit(MDNode::get(VMContext, &Elts[0], 10)); +} + +/// CreateFile - Create a new descriptor for the specified file. +DIFile DIFactory::CreateFile(StringRef Filename, + StringRef Directory, + DICompileUnit CU) { + Value *Elts[] = { + GetTagConstant(dwarf::DW_TAG_file_type), + MDString::get(VMContext, Filename), + MDString::get(VMContext, Directory), + CU + }; + + return DIFile(MDNode::get(VMContext, &Elts[0], 4)); +} + +/// CreateEnumerator - Create a single enumerator value. +DIEnumerator DIFactory::CreateEnumerator(StringRef Name, uint64_t Val){ + Value *Elts[] = { + GetTagConstant(dwarf::DW_TAG_enumerator), + MDString::get(VMContext, Name), + ConstantInt::get(Type::getInt64Ty(VMContext), Val) + }; + return DIEnumerator(MDNode::get(VMContext, &Elts[0], 3)); +} + + +/// CreateBasicType - Create a basic type like int, float, etc. +DIBasicType DIFactory::CreateBasicType(DIDescriptor Context, + StringRef Name, + DIFile F, + unsigned LineNumber, + uint64_t SizeInBits, + uint64_t AlignInBits, + uint64_t OffsetInBits, unsigned Flags, + unsigned Encoding) { + Value *Elts[] = { + GetTagConstant(dwarf::DW_TAG_base_type), + Context, + MDString::get(VMContext, Name), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + ConstantInt::get(Type::getInt32Ty(VMContext), Encoding) + }; + return DIBasicType(MDNode::get(VMContext, &Elts[0], 10)); +} + + +/// CreateBasicType - Create a basic type like int, float, etc. +DIBasicType DIFactory::CreateBasicTypeEx(DIDescriptor Context, + StringRef Name, + DIFile F, + unsigned LineNumber, + Constant *SizeInBits, + Constant *AlignInBits, + Constant *OffsetInBits, unsigned Flags, + unsigned Encoding) { + Value *Elts[] = { + GetTagConstant(dwarf::DW_TAG_base_type), + Context, + MDString::get(VMContext, Name), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + SizeInBits, + AlignInBits, + OffsetInBits, + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + ConstantInt::get(Type::getInt32Ty(VMContext), Encoding) + }; + return DIBasicType(MDNode::get(VMContext, &Elts[0], 10)); +} + +/// CreateArtificialType - Create a new DIType with "artificial" flag set. +DIType DIFactory::CreateArtificialType(DIType Ty) { + if (Ty.isArtificial()) + return Ty; + + SmallVector<Value *, 9> Elts; + MDNode *N = Ty; + assert (N && "Unexpected input DIType!"); + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + if (Value *V = N->getOperand(i)) + Elts.push_back(V); + else + Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext))); + } + + unsigned CurFlags = Ty.getFlags(); + CurFlags = CurFlags | DIType::FlagArtificial; + + // Flags are stored at this slot. + Elts[8] = ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags); + + return DIType(MDNode::get(VMContext, Elts.data(), Elts.size())); +} + +/// CreateDerivedType - Create a derived type like const qualified type, +/// pointer, typedef, etc. +DIDerivedType DIFactory::CreateDerivedType(unsigned Tag, + DIDescriptor Context, + StringRef Name, + DIFile F, + unsigned LineNumber, + uint64_t SizeInBits, + uint64_t AlignInBits, + uint64_t OffsetInBits, + unsigned Flags, + DIType DerivedFrom) { + Value *Elts[] = { + GetTagConstant(Tag), + Context, + MDString::get(VMContext, Name), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + DerivedFrom, + }; + return DIDerivedType(MDNode::get(VMContext, &Elts[0], 10)); +} + + +/// CreateDerivedType - Create a derived type like const qualified type, +/// pointer, typedef, etc. +DIDerivedType DIFactory::CreateDerivedTypeEx(unsigned Tag, + DIDescriptor Context, + StringRef Name, + DIFile F, + unsigned LineNumber, + Constant *SizeInBits, + Constant *AlignInBits, + Constant *OffsetInBits, + unsigned Flags, + DIType DerivedFrom) { + Value *Elts[] = { + GetTagConstant(Tag), + Context, + MDString::get(VMContext, Name), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + SizeInBits, + AlignInBits, + OffsetInBits, + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + DerivedFrom, + }; + return DIDerivedType(MDNode::get(VMContext, &Elts[0], 10)); +} + + +/// CreateCompositeType - Create a composite type like array, struct, etc. +DICompositeType DIFactory::CreateCompositeType(unsigned Tag, + DIDescriptor Context, + StringRef Name, + DIFile F, + unsigned LineNumber, + uint64_t SizeInBits, + uint64_t AlignInBits, + uint64_t OffsetInBits, + unsigned Flags, + DIType DerivedFrom, + DIArray Elements, + unsigned RuntimeLang, + MDNode *ContainingType) { + + Value *Elts[] = { + GetTagConstant(Tag), + Context, + MDString::get(VMContext, Name), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + DerivedFrom, + Elements, + ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang), + ContainingType + }; + + MDNode *Node = MDNode::get(VMContext, &Elts[0], 13); + // Create a named metadata so that we do not lose this enum info. + if (Tag == dwarf::DW_TAG_enumeration_type) { + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum"); + NMD->addOperand(Node); + } + return DICompositeType(Node); +} + + +/// CreateTemporaryType - Create a temporary forward-declared type. +DIType DIFactory::CreateTemporaryType() { + // Give the temporary MDNode a tag. It doesn't matter what tag we + // use here as long as DIType accepts it. + Value *Elts[] = { + GetTagConstant(DW_TAG_base_type) + }; + MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts)); + return DIType(Node); +} + + +/// CreateCompositeType - Create a composite type like array, struct, etc. +DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag, + DIDescriptor Context, + StringRef Name, + DIFile F, + unsigned LineNumber, + Constant *SizeInBits, + Constant *AlignInBits, + Constant *OffsetInBits, + unsigned Flags, + DIType DerivedFrom, + DIArray Elements, + unsigned RuntimeLang, + MDNode *ContainingType) { + Value *Elts[] = { + GetTagConstant(Tag), + Context, + MDString::get(VMContext, Name), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + SizeInBits, + AlignInBits, + OffsetInBits, + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + DerivedFrom, + Elements, + ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang), + ContainingType + }; + MDNode *Node = MDNode::get(VMContext, &Elts[0], 13); + // Create a named metadata so that we do not lose this enum info. + if (Tag == dwarf::DW_TAG_enumeration_type) { + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum"); + NMD->addOperand(Node); + } + return DICompositeType(Node); +} + + +/// CreateSubprogram - Create a new descriptor for the specified subprogram. +/// See comments in DISubprogram for descriptions of these fields. This +/// method does not unique the generated descriptors. +DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, + StringRef Name, + StringRef DisplayName, + StringRef LinkageName, + DIFile F, + unsigned LineNo, DIType Ty, + bool isLocalToUnit, + bool isDefinition, + unsigned VK, unsigned VIndex, + DIType ContainingType, + bool isArtificial, + bool isOptimized, + Function *Fn) { + + Value *Elts[] = { + GetTagConstant(dwarf::DW_TAG_subprogram), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + Context, + MDString::get(VMContext, Name), + MDString::get(VMContext, DisplayName), + MDString::get(VMContext, LinkageName), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + Ty, + ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), + ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), + ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK), + ConstantInt::get(Type::getInt32Ty(VMContext), VIndex), + ContainingType, + ConstantInt::get(Type::getInt1Ty(VMContext), isArtificial), + ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), + Fn + }; + MDNode *Node = MDNode::get(VMContext, &Elts[0], 17); + + // Create a named metadata so that we do not lose this mdnode. + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); + NMD->addOperand(Node); + return DISubprogram(Node); +} + +/// CreateSubprogramDefinition - Create new subprogram descriptor for the +/// given declaration. +DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration){ + if (SPDeclaration.isDefinition()) + return DISubprogram(SPDeclaration); + + MDNode *DeclNode = SPDeclaration; + Value *Elts[] = { + GetTagConstant(dwarf::DW_TAG_subprogram), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + DeclNode->getOperand(2), // Context + DeclNode->getOperand(3), // Name + DeclNode->getOperand(4), // DisplayName + DeclNode->getOperand(5), // LinkageName + DeclNode->getOperand(6), // CompileUnit + DeclNode->getOperand(7), // LineNo + DeclNode->getOperand(8), // Type + DeclNode->getOperand(9), // isLocalToUnit + ConstantInt::get(Type::getInt1Ty(VMContext), true), + DeclNode->getOperand(11), // Virtuality + DeclNode->getOperand(12), // VIndex + DeclNode->getOperand(13), // Containting Type + DeclNode->getOperand(14), // isArtificial + DeclNode->getOperand(15), // isOptimized + SPDeclaration.getFunction() + }; + MDNode *Node =MDNode::get(VMContext, &Elts[0], 16); + + // Create a named metadata so that we do not lose this mdnode. + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); + NMD->addOperand(Node); + return DISubprogram(Node); +} + +/// CreateGlobalVariable - Create a new descriptor for the specified global. +DIGlobalVariable +DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name, + StringRef DisplayName, + StringRef LinkageName, + DIFile F, + unsigned LineNo, DIType Ty,bool isLocalToUnit, + bool isDefinition, llvm::GlobalVariable *Val) { + Value *Elts[] = { + GetTagConstant(dwarf::DW_TAG_variable), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + Context, + MDString::get(VMContext, Name), + MDString::get(VMContext, DisplayName), + MDString::get(VMContext, LinkageName), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + Ty, + ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), + ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), + Val + }; + + Value *const *Vs = &Elts[0]; + MDNode *Node = MDNode::get(VMContext,Vs, 12); + + // Create a named metadata so that we do not lose this mdnode. + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); + NMD->addOperand(Node); + + return DIGlobalVariable(Node); +} + +/// CreateGlobalVariable - Create a new descriptor for the specified constant. +DIGlobalVariable +DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name, + StringRef DisplayName, + StringRef LinkageName, + DIFile F, + unsigned LineNo, DIType Ty,bool isLocalToUnit, + bool isDefinition, llvm::Constant *Val) { + Value *Elts[] = { + GetTagConstant(dwarf::DW_TAG_variable), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + Context, + MDString::get(VMContext, Name), + MDString::get(VMContext, DisplayName), + MDString::get(VMContext, LinkageName), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + Ty, + ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), + ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), + Val + }; + + Value *const *Vs = &Elts[0]; + MDNode *Node = MDNode::get(VMContext,Vs, 12); + + // Create a named metadata so that we do not lose this mdnode. + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); + NMD->addOperand(Node); + + return DIGlobalVariable(Node); +} + +/// CreateVariable - Create a new descriptor for the specified variable. +DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context, + StringRef Name, + DIFile F, + unsigned LineNo, + DIType Ty, bool AlwaysPreserve) { + Value *Elts[] = { + GetTagConstant(Tag), + Context, + MDString::get(VMContext, Name), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + Ty, + }; + MDNode *Node = MDNode::get(VMContext, &Elts[0], 6); + if (AlwaysPreserve) { + // The optimizer may remove local variable. If there is an interest + // to preserve variable info in such situation then stash it in a + // named mdnode. + DISubprogram Fn(getDISubprogram(Context)); + StringRef FName = "fn"; + if (Fn.getFunction()) + FName = Fn.getFunction()->getName(); + char One = '\1'; + if (FName.startswith(StringRef(&One, 1))) + FName = FName.substr(1); + + SmallString<32> Out; + NamedMDNode *FnLocals = + M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", FName).toStringRef(Out)); + FnLocals->addOperand(Node); + } + return DIVariable(Node); +} + + +/// CreateComplexVariable - Create a new descriptor for the specified variable +/// which has a complex address expression for its address. +DIVariable DIFactory::CreateComplexVariable(unsigned Tag, DIDescriptor Context, + const std::string &Name, + DIFile F, + unsigned LineNo, + DIType Ty, + SmallVector<Value *, 9> &addr) { + SmallVector<Value *, 9> Elts; + Elts.push_back(GetTagConstant(Tag)); + Elts.push_back(Context); + Elts.push_back(MDString::get(VMContext, Name)); + Elts.push_back(F); + Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)); + Elts.push_back(Ty); + Elts.insert(Elts.end(), addr.begin(), addr.end()); + + return DIVariable(MDNode::get(VMContext, &Elts[0], 6+addr.size())); +} + + +/// CreateBlock - This creates a descriptor for a lexical block with the +/// specified parent VMContext. +DILexicalBlock DIFactory::CreateLexicalBlock(DIDescriptor Context, + DIFile F, unsigned LineNo, + unsigned Col) { + // Defeat MDNode uniqing for lexical blocks. + static unsigned int unique_id = 0; + Value *Elts[] = { + GetTagConstant(dwarf::DW_TAG_lexical_block), + Context, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + ConstantInt::get(Type::getInt32Ty(VMContext), Col), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++) + }; + return DILexicalBlock(MDNode::get(VMContext, &Elts[0], 6)); +} + +/// CreateNameSpace - This creates new descriptor for a namespace +/// with the specified parent context. +DINameSpace DIFactory::CreateNameSpace(DIDescriptor Context, StringRef Name, + DIFile F, + unsigned LineNo) { + Value *Elts[] = { + GetTagConstant(dwarf::DW_TAG_namespace), + Context, + MDString::get(VMContext, Name), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo) + }; + return DINameSpace(MDNode::get(VMContext, &Elts[0], 5)); +} + +/// CreateLocation - Creates a debug info location. +DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo, + DIScope S, DILocation OrigLoc) { + Value *Elts[] = { + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo), + S, + OrigLoc, + }; + return DILocation(MDNode::get(VMContext, &Elts[0], 4)); +} + +//===----------------------------------------------------------------------===// +// DIFactory: Routines for inserting code into a function +//===----------------------------------------------------------------------===// + +/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. +Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D, + Instruction *InsertBefore) { + assert(Storage && "no storage passed to dbg.declare"); + assert(D.Verify() && "empty DIVariable passed to dbg.declare"); + if (!DeclareFn) + DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); + + Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), + D }; + return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore); +} + +/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. +Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D, + BasicBlock *InsertAtEnd) { + assert(Storage && "no storage passed to dbg.declare"); + assert(D.Verify() && "invalid DIVariable passed to dbg.declare"); + if (!DeclareFn) + DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); + + Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), + D }; + + // If this block already has a terminator then insert this intrinsic + // before the terminator. + if (TerminatorInst *T = InsertAtEnd->getTerminator()) + return CallInst::Create(DeclareFn, Args, Args+2, "", T); + else + return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);} + +/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. +Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset, + DIVariable D, + Instruction *InsertBefore) { + assert(V && "no value passed to dbg.value"); + assert(D.Verify() && "invalid DIVariable passed to dbg.value"); + if (!ValueFn) + ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); + + Value *Args[] = { MDNode::get(V->getContext(), &V, 1), + ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), + D }; + return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore); +} + +/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. +Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset, + DIVariable D, + BasicBlock *InsertAtEnd) { + assert(V && "no value passed to dbg.value"); + assert(D.Verify() && "invalid DIVariable passed to dbg.value"); + if (!ValueFn) + ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); + + Value *Args[] = { MDNode::get(V->getContext(), &V, 1), + ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), + D }; + return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd); +} + +//===----------------------------------------------------------------------===// +// DebugInfoFinder implementations. +//===----------------------------------------------------------------------===// + +/// processModule - Process entire module and collect debug info. +void DebugInfoFinder::processModule(Module &M) { + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI) + for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE; + ++BI) { + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) + processDeclare(DDI); + + DebugLoc Loc = BI->getDebugLoc(); + if (Loc.isUnknown()) + continue; + + LLVMContext &Ctx = BI->getContext(); + DIDescriptor Scope(Loc.getScope(Ctx)); + + if (Scope.isCompileUnit()) + addCompileUnit(DICompileUnit(Scope)); + else if (Scope.isSubprogram()) + processSubprogram(DISubprogram(Scope)); + else if (Scope.isLexicalBlock()) + processLexicalBlock(DILexicalBlock(Scope)); + + if (MDNode *IA = Loc.getInlinedAt(Ctx)) + processLocation(DILocation(IA)); + } + + if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) { + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIGlobalVariable DIG(cast<MDNode>(NMD->getOperand(i))); + if (addGlobalVariable(DIG)) { + addCompileUnit(DIG.getCompileUnit()); + processType(DIG.getType()); + } + } + } + + if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp")) + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) + processSubprogram(DISubprogram(NMD->getOperand(i))); +} + +/// processLocation - Process DILocation. +void DebugInfoFinder::processLocation(DILocation Loc) { + if (!Loc.Verify()) return; + DIDescriptor S(Loc.getScope()); + if (S.isCompileUnit()) + addCompileUnit(DICompileUnit(S)); + else if (S.isSubprogram()) + processSubprogram(DISubprogram(S)); + else if (S.isLexicalBlock()) + processLexicalBlock(DILexicalBlock(S)); + processLocation(Loc.getOrigLocation()); +} + +/// processType - Process DIType. +void DebugInfoFinder::processType(DIType DT) { + if (!addType(DT)) + return; + + addCompileUnit(DT.getCompileUnit()); + if (DT.isCompositeType()) { + DICompositeType DCT(DT); + processType(DCT.getTypeDerivedFrom()); + DIArray DA = DCT.getTypeArray(); + for (unsigned i = 0, e = DA.getNumElements(); i != e; ++i) { + DIDescriptor D = DA.getElement(i); + if (D.isType()) + processType(DIType(D)); + else if (D.isSubprogram()) + processSubprogram(DISubprogram(D)); + } + } else if (DT.isDerivedType()) { + DIDerivedType DDT(DT); + processType(DDT.getTypeDerivedFrom()); + } +} + +/// processLexicalBlock +void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) { + DIScope Context = LB.getContext(); + if (Context.isLexicalBlock()) + return processLexicalBlock(DILexicalBlock(Context)); + else + return processSubprogram(DISubprogram(Context)); +} + +/// processSubprogram - Process DISubprogram. +void DebugInfoFinder::processSubprogram(DISubprogram SP) { + if (!addSubprogram(SP)) + return; + addCompileUnit(SP.getCompileUnit()); + processType(SP.getType()); +} + +/// processDeclare - Process DbgDeclareInst. +void DebugInfoFinder::processDeclare(DbgDeclareInst *DDI) { + MDNode *N = dyn_cast<MDNode>(DDI->getVariable()); + if (!N) return; + + DIDescriptor DV(N); + if (!DV.isVariable()) + return; + + if (!NodesSeen.insert(DV)) + return; + + addCompileUnit(DIVariable(N).getCompileUnit()); + processType(DIVariable(N).getType()); +} + +/// addType - Add type into Tys. +bool DebugInfoFinder::addType(DIType DT) { + if (!DT.isValid()) + return false; + + if (!NodesSeen.insert(DT)) + return false; + + TYs.push_back(DT); + return true; +} + +/// addCompileUnit - Add compile unit into CUs. +bool DebugInfoFinder::addCompileUnit(DICompileUnit CU) { + if (!CU.Verify()) + return false; + + if (!NodesSeen.insert(CU)) + return false; + + CUs.push_back(CU); + return true; +} + +/// addGlobalVariable - Add global variable into GVs. +bool DebugInfoFinder::addGlobalVariable(DIGlobalVariable DIG) { + if (!DIDescriptor(DIG).isGlobalVariable()) + return false; + + if (!NodesSeen.insert(DIG)) + return false; + + GVs.push_back(DIG); + return true; +} + +// addSubprogram - Add subprgoram into SPs. +bool DebugInfoFinder::addSubprogram(DISubprogram SP) { + if (!DIDescriptor(SP).isSubprogram()) + return false; + + if (!NodesSeen.insert(SP)) + return false; + + SPs.push_back(SP); + return true; +} + +/// Find the debug info descriptor corresponding to this global variable. +static Value *findDbgGlobalDeclare(GlobalVariable *V) { + const Module *M = V->getParent(); + NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv"); + if (!NMD) + return 0; + + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i))); + if (!DIG.isGlobalVariable()) + continue; + if (DIGlobalVariable(DIG).getGlobal() == V) + return DIG; + } + return 0; +} + +/// Finds the llvm.dbg.declare intrinsic corresponding to this value if any. +/// It looks through pointer casts too. +static const DbgDeclareInst *findDbgDeclare(const Value *V) { + V = V->stripPointerCasts(); + + if (!isa<Instruction>(V) && !isa<Argument>(V)) + return 0; + + const Function *F = NULL; + if (const Instruction *I = dyn_cast<Instruction>(V)) + F = I->getParent()->getParent(); + else if (const Argument *A = dyn_cast<Argument>(V)) + F = A->getParent(); + + for (Function::const_iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) + for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end(); + BI != BE; ++BI) + if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) + if (DDI->getAddress() == V) + return DDI; + + return 0; +} + +bool llvm::getLocationInfo(const Value *V, std::string &DisplayName, + std::string &Type, unsigned &LineNo, + std::string &File, std::string &Dir) { + DICompileUnit Unit; + DIType TypeD; + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(const_cast<Value*>(V))) { + Value *DIGV = findDbgGlobalDeclare(GV); + if (!DIGV) return false; + DIGlobalVariable Var(cast<MDNode>(DIGV)); + + StringRef D = Var.getDisplayName(); + if (!D.empty()) + DisplayName = D; + LineNo = Var.getLineNumber(); + Unit = Var.getCompileUnit(); + TypeD = Var.getType(); + } else { + const DbgDeclareInst *DDI = findDbgDeclare(V); + if (!DDI) return false; + DIVariable Var(cast<MDNode>(DDI->getVariable())); + + StringRef D = Var.getName(); + if (!D.empty()) + DisplayName = D; + LineNo = Var.getLineNumber(); + Unit = Var.getCompileUnit(); + TypeD = Var.getType(); + } + + StringRef T = TypeD.getName(); + if (!T.empty()) + Type = T; + StringRef F = Unit.getFilename(); + if (!F.empty()) + File = F; + StringRef D = Unit.getDirectory(); + if (!D.empty()) + Dir = D; + return true; +} + +/// getDISubprogram - Find subprogram that is enclosing this scope. +DISubprogram llvm::getDISubprogram(const MDNode *Scope) { + DIDescriptor D(Scope); + if (D.isSubprogram()) + return DISubprogram(Scope); + + if (D.isLexicalBlock()) + return getDISubprogram(DILexicalBlock(Scope).getContext()); + + return DISubprogram(); +} + +/// getDICompositeType - Find underlying composite type. +DICompositeType llvm::getDICompositeType(DIType T) { + if (T.isCompositeType()) + return DICompositeType(T); + + if (T.isDerivedType()) + return getDICompositeType(DIDerivedType(T).getTypeDerivedFrom()); + + return DICompositeType(); +} diff --git a/contrib/llvm/lib/Analysis/DomPrinter.cpp b/contrib/llvm/lib/Analysis/DomPrinter.cpp new file mode 100644 index 0000000..9f34094 --- /dev/null +++ b/contrib/llvm/lib/Analysis/DomPrinter.cpp @@ -0,0 +1,218 @@ +//===- DomPrinter.cpp - DOT printer for the dominance trees ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines '-dot-dom' and '-dot-postdom' analysis passes, which emit +// a dom.<fnname>.dot or postdom.<fnname>.dot file for each function in the +// program, with a graph of the dominance/postdominance tree of that +// function. +// +// There are also passes available to directly call dotty ('-view-dom' or +// '-view-postdom'). By appending '-only' like '-dot-dom-only' only the +// names of the bbs are printed, but the content is hidden. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/DomPrinter.h" + +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/DOTGraphTraitsPass.h" +#include "llvm/Analysis/PostDominators.h" + +using namespace llvm; + +namespace llvm { +template<> +struct DOTGraphTraits<DomTreeNode*> : public DefaultDOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) + : DefaultDOTGraphTraits(isSimple) {} + + std::string getNodeLabel(DomTreeNode *Node, DomTreeNode *Graph) { + + BasicBlock *BB = Node->getBlock(); + + if (!BB) + return "Post dominance root node"; + + + if (isSimple()) + return DOTGraphTraits<const Function*> + ::getSimpleNodeLabel(BB, BB->getParent()); + else + return DOTGraphTraits<const Function*> + ::getCompleteNodeLabel(BB, BB->getParent()); + } +}; + +template<> +struct DOTGraphTraits<DominatorTree*> : public DOTGraphTraits<DomTreeNode*> { + + DOTGraphTraits (bool isSimple=false) + : DOTGraphTraits<DomTreeNode*>(isSimple) {} + + static std::string getGraphName(DominatorTree *DT) { + return "Dominator tree"; + } + + std::string getNodeLabel(DomTreeNode *Node, DominatorTree *G) { + return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode()); + } +}; + +template<> +struct DOTGraphTraits<PostDominatorTree*> + : public DOTGraphTraits<DomTreeNode*> { + + DOTGraphTraits (bool isSimple=false) + : DOTGraphTraits<DomTreeNode*>(isSimple) {} + + static std::string getGraphName(PostDominatorTree *DT) { + return "Post dominator tree"; + } + + std::string getNodeLabel(DomTreeNode *Node, PostDominatorTree *G ) { + return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode()); + } +}; +} + +namespace { +struct DomViewer + : public DOTGraphTraitsViewer<DominatorTree, false> { + static char ID; + DomViewer() : DOTGraphTraitsViewer<DominatorTree, false>("dom", ID){} +}; + +struct DomOnlyViewer + : public DOTGraphTraitsViewer<DominatorTree, true> { + static char ID; + DomOnlyViewer() : DOTGraphTraitsViewer<DominatorTree, true>("domonly", ID){} +}; + +struct PostDomViewer + : public DOTGraphTraitsViewer<PostDominatorTree, false> { + static char ID; + PostDomViewer() : + DOTGraphTraitsViewer<PostDominatorTree, false>("postdom", ID){} +}; + +struct PostDomOnlyViewer + : public DOTGraphTraitsViewer<PostDominatorTree, true> { + static char ID; + PostDomOnlyViewer() : + DOTGraphTraitsViewer<PostDominatorTree, true>("postdomonly", ID){} +}; +} // end anonymous namespace + +char DomViewer::ID = 0; +INITIALIZE_PASS(DomViewer, "view-dom", + "View dominance tree of function", false, false); + +char DomOnlyViewer::ID = 0; +INITIALIZE_PASS(DomOnlyViewer, "view-dom-only", + "View dominance tree of function (with no function bodies)", + false, false); + +char PostDomViewer::ID = 0; +INITIALIZE_PASS(PostDomViewer, "view-postdom", + "View postdominance tree of function", false, false); + +char PostDomOnlyViewer::ID = 0; +INITIALIZE_PASS(PostDomOnlyViewer, "view-postdom-only", + "View postdominance tree of function " + "(with no function bodies)", + false, false); + +namespace { +struct DomPrinter + : public DOTGraphTraitsPrinter<DominatorTree, false> { + static char ID; + DomPrinter() : DOTGraphTraitsPrinter<DominatorTree, false>("dom", ID) {} +}; + +struct DomOnlyPrinter + : public DOTGraphTraitsPrinter<DominatorTree, true> { + static char ID; + DomOnlyPrinter() : DOTGraphTraitsPrinter<DominatorTree, true>("domonly", ID) {} +}; + +struct PostDomPrinter + : public DOTGraphTraitsPrinter<PostDominatorTree, false> { + static char ID; + PostDomPrinter() : + DOTGraphTraitsPrinter<PostDominatorTree, false>("postdom", ID) {} +}; + +struct PostDomOnlyPrinter + : public DOTGraphTraitsPrinter<PostDominatorTree, true> { + static char ID; + PostDomOnlyPrinter() : + DOTGraphTraitsPrinter<PostDominatorTree, true>("postdomonly", ID) {} +}; +} // end anonymous namespace + + + +char DomPrinter::ID = 0; +INITIALIZE_PASS(DomPrinter, "dot-dom", + "Print dominance tree of function to 'dot' file", + false, false); + +char DomOnlyPrinter::ID = 0; +INITIALIZE_PASS(DomOnlyPrinter, "dot-dom-only", + "Print dominance tree of function to 'dot' file " + "(with no function bodies)", + false, false); + +char PostDomPrinter::ID = 0; +INITIALIZE_PASS(PostDomPrinter, "dot-postdom", + "Print postdominance tree of function to 'dot' file", + false, false); + +char PostDomOnlyPrinter::ID = 0; +INITIALIZE_PASS(PostDomOnlyPrinter, "dot-postdom-only", + "Print postdominance tree of function to 'dot' file " + "(with no function bodies)", + false, false); + +// Create methods available outside of this file, to use them +// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by +// the link time optimization. + +FunctionPass *llvm::createDomPrinterPass() { + return new DomPrinter(); +} + +FunctionPass *llvm::createDomOnlyPrinterPass() { + return new DomOnlyPrinter(); +} + +FunctionPass *llvm::createDomViewerPass() { + return new DomViewer(); +} + +FunctionPass *llvm::createDomOnlyViewerPass() { + return new DomOnlyViewer(); +} + +FunctionPass *llvm::createPostDomPrinterPass() { + return new PostDomPrinter(); +} + +FunctionPass *llvm::createPostDomOnlyPrinterPass() { + return new PostDomOnlyPrinter(); +} + +FunctionPass *llvm::createPostDomViewerPass() { + return new PostDomViewer(); +} + +FunctionPass *llvm::createPostDomOnlyViewerPass() { + return new PostDomOnlyViewer(); +} diff --git a/contrib/llvm/lib/Analysis/IPA/CMakeLists.txt b/contrib/llvm/lib/Analysis/IPA/CMakeLists.txt new file mode 100644 index 0000000..007ad22 --- /dev/null +++ b/contrib/llvm/lib/Analysis/IPA/CMakeLists.txt @@ -0,0 +1,6 @@ +add_llvm_library(LLVMipa + CallGraph.cpp + CallGraphSCCPass.cpp + FindUsedTypes.cpp + GlobalsModRef.cpp + ) diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp new file mode 100644 index 0000000..b363528 --- /dev/null +++ b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp @@ -0,0 +1,324 @@ +//===- CallGraph.cpp - Build a Module's call graph ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CallGraph class and provides the BasicCallGraph +// default implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Module.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + +//===----------------------------------------------------------------------===// +// BasicCallGraph class definition +// +class BasicCallGraph : public ModulePass, public CallGraph { + // Root is root of the call graph, or the external node if a 'main' function + // couldn't be found. + // + CallGraphNode *Root; + + // ExternalCallingNode - This node has edges to all external functions and + // those internal functions that have their address taken. + CallGraphNode *ExternalCallingNode; + + // CallsExternalNode - This node has edges to it from all functions making + // indirect calls or calling an external function. + CallGraphNode *CallsExternalNode; + +public: + static char ID; // Class identification, replacement for typeinfo + BasicCallGraph() : ModulePass(ID), Root(0), + ExternalCallingNode(0), CallsExternalNode(0) {} + + // runOnModule - Compute the call graph for the specified module. + virtual bool runOnModule(Module &M) { + CallGraph::initialize(M); + + ExternalCallingNode = getOrInsertFunction(0); + CallsExternalNode = new CallGraphNode(0); + Root = 0; + + // Add every function to the call graph. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + addToCallGraph(I); + + // If we didn't find a main function, use the external call graph node + if (Root == 0) Root = ExternalCallingNode; + + return false; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + virtual void print(raw_ostream &OS, const Module *) const { + OS << "CallGraph Root is: "; + if (Function *F = getRoot()->getFunction()) + OS << F->getName() << "\n"; + else { + OS << "<<null function: 0x" << getRoot() << ">>\n"; + } + + CallGraph::print(OS, 0); + } + + virtual void releaseMemory() { + destroy(); + } + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it should + /// override this to adjust the this pointer as needed for the specified pass + /// info. + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &CallGraph::ID) + return (CallGraph*)this; + return this; + } + + CallGraphNode* getExternalCallingNode() const { return ExternalCallingNode; } + CallGraphNode* getCallsExternalNode() const { return CallsExternalNode; } + + // getRoot - Return the root of the call graph, which is either main, or if + // main cannot be found, the external node. + // + CallGraphNode *getRoot() { return Root; } + const CallGraphNode *getRoot() const { return Root; } + +private: + //===--------------------------------------------------------------------- + // Implementation of CallGraph construction + // + + // addToCallGraph - Add a function to the call graph, and link the node to all + // of the functions that it calls. + // + void addToCallGraph(Function *F) { + CallGraphNode *Node = getOrInsertFunction(F); + + // If this function has external linkage, anything could call it. + if (!F->hasLocalLinkage()) { + ExternalCallingNode->addCalledFunction(CallSite(), Node); + + // Found the entry point? + if (F->getName() == "main") { + if (Root) // Found multiple external mains? Don't pick one. + Root = ExternalCallingNode; + else + Root = Node; // Found a main, keep track of it! + } + } + + // Loop over all of the users of the function, looking for non-call uses. + for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; ++I){ + User *U = *I; + if ((!isa<CallInst>(U) && !isa<InvokeInst>(U)) + || !CallSite(cast<Instruction>(U)).isCallee(I)) { + // Not a call, or being used as a parameter rather than as the callee. + ExternalCallingNode->addCalledFunction(CallSite(), Node); + break; + } + } + + // If this function is not defined in this translation unit, it could call + // anything. + if (F->isDeclaration() && !F->isIntrinsic()) + Node->addCalledFunction(CallSite(), CallsExternalNode); + + // Look for calls by this function. + for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB) + for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); + II != IE; ++II) { + CallSite CS(cast<Value>(II)); + if (CS && !isa<DbgInfoIntrinsic>(II)) { + const Function *Callee = CS.getCalledFunction(); + if (Callee) + Node->addCalledFunction(CS, getOrInsertFunction(Callee)); + else + Node->addCalledFunction(CS, CallsExternalNode); + } + } + } + + // + // destroy - Release memory for the call graph + virtual void destroy() { + /// CallsExternalNode is not in the function map, delete it explicitly. + if (CallsExternalNode) { + CallsExternalNode->allReferencesDropped(); + delete CallsExternalNode; + CallsExternalNode = 0; + } + CallGraph::destroy(); + } +}; + +} //End anonymous namespace + +static RegisterAnalysisGroup<CallGraph> X("Call Graph"); +INITIALIZE_AG_PASS(BasicCallGraph, CallGraph, "basiccg", + "Basic CallGraph Construction", false, true, true); + +char CallGraph::ID = 0; +char BasicCallGraph::ID = 0; + +void CallGraph::initialize(Module &M) { + Mod = &M; +} + +void CallGraph::destroy() { + if (FunctionMap.empty()) return; + + // Reset all node's use counts to zero before deleting them to prevent an + // assertion from firing. +#ifndef NDEBUG + for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end(); + I != E; ++I) + I->second->allReferencesDropped(); +#endif + + for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end(); + I != E; ++I) + delete I->second; + FunctionMap.clear(); +} + +void CallGraph::print(raw_ostream &OS, Module*) const { + for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I) + I->second->print(OS); +} +void CallGraph::dump() const { + print(dbgs(), 0); +} + +//===----------------------------------------------------------------------===// +// Implementations of public modification methods +// + +// removeFunctionFromModule - Unlink the function from this module, returning +// it. Because this removes the function from the module, the call graph node +// is destroyed. This is only valid if the function does not call any other +// functions (ie, there are no edges in it's CGN). The easiest way to do this +// is to dropAllReferences before calling this. +// +Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) { + assert(CGN->empty() && "Cannot remove function from call " + "graph if it references other functions!"); + Function *F = CGN->getFunction(); // Get the function for the call graph node + delete CGN; // Delete the call graph node for this func + FunctionMap.erase(F); // Remove the call graph node from the map + + Mod->getFunctionList().remove(F); + return F; +} + +// getOrInsertFunction - This method is identical to calling operator[], but +// it will insert a new CallGraphNode for the specified function if one does +// not already exist. +CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) { + CallGraphNode *&CGN = FunctionMap[F]; + if (CGN) return CGN; + + assert((!F || F->getParent() == Mod) && "Function not in current module!"); + return CGN = new CallGraphNode(const_cast<Function*>(F)); +} + +void CallGraphNode::print(raw_ostream &OS) const { + if (Function *F = getFunction()) + OS << "Call graph node for function: '" << F->getName() << "'"; + else + OS << "Call graph node <<null function>>"; + + OS << "<<" << this << ">> #uses=" << getNumReferences() << '\n'; + + for (const_iterator I = begin(), E = end(); I != E; ++I) { + OS << " CS<" << I->first << "> calls "; + if (Function *FI = I->second->getFunction()) + OS << "function '" << FI->getName() <<"'\n"; + else + OS << "external node\n"; + } + OS << '\n'; +} + +void CallGraphNode::dump() const { print(dbgs()); } + +/// removeCallEdgeFor - This method removes the edge in the node for the +/// specified call site. Note that this method takes linear time, so it +/// should be used sparingly. +void CallGraphNode::removeCallEdgeFor(CallSite CS) { + for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { + assert(I != CalledFunctions.end() && "Cannot find callsite to remove!"); + if (I->first == CS.getInstruction()) { + I->second->DropRef(); + *I = CalledFunctions.back(); + CalledFunctions.pop_back(); + return; + } + } +} + + +// removeAnyCallEdgeTo - This method removes any call edges from this node to +// the specified callee function. This takes more time to execute than +// removeCallEdgeTo, so it should not be used unless necessary. +void CallGraphNode::removeAnyCallEdgeTo(CallGraphNode *Callee) { + for (unsigned i = 0, e = CalledFunctions.size(); i != e; ++i) + if (CalledFunctions[i].second == Callee) { + Callee->DropRef(); + CalledFunctions[i] = CalledFunctions.back(); + CalledFunctions.pop_back(); + --i; --e; + } +} + +/// removeOneAbstractEdgeTo - Remove one edge associated with a null callsite +/// from this node to the specified callee function. +void CallGraphNode::removeOneAbstractEdgeTo(CallGraphNode *Callee) { + for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { + assert(I != CalledFunctions.end() && "Cannot find callee to remove!"); + CallRecord &CR = *I; + if (CR.second == Callee && CR.first == 0) { + Callee->DropRef(); + *I = CalledFunctions.back(); + CalledFunctions.pop_back(); + return; + } + } +} + +/// replaceCallEdge - This method replaces the edge in the node for the +/// specified call site with a new one. Note that this method takes linear +/// time, so it should be used sparingly. +void CallGraphNode::replaceCallEdge(CallSite CS, + CallSite NewCS, CallGraphNode *NewNode){ + for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { + assert(I != CalledFunctions.end() && "Cannot find callsite to remove!"); + if (I->first == CS.getInstruction()) { + I->second->DropRef(); + I->first = NewCS.getInstruction(); + I->second = NewNode; + NewNode->AddRef(); + return; + } + } +} + +// Enuse that users of CallGraph.h also link with this file +DEFINING_FILE_FOR(CallGraph) diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp new file mode 100644 index 0000000..b7a27cb --- /dev/null +++ b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -0,0 +1,609 @@ +//===- CallGraphSCCPass.cpp - Pass that operates BU on call graph ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CallGraphSCCPass class, which is used for passes +// which are implemented as bottom-up traversals on the call graph. Because +// there may be cycles in the call graph, passes of this type operate on the +// call-graph in SCC order: that is, they process function bottom-up, except for +// recursive functions, which they process all at once. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "cgscc-passmgr" +#include "llvm/CallGraphSCCPass.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Function.h" +#include "llvm/PassManagers.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +static cl::opt<unsigned> +MaxIterations("max-cg-scc-iterations", cl::ReallyHidden, cl::init(4)); + +STATISTIC(MaxSCCIterations, "Maximum CGSCCPassMgr iterations on one SCC"); + +//===----------------------------------------------------------------------===// +// CGPassManager +// +/// CGPassManager manages FPPassManagers and CallGraphSCCPasses. + +namespace { + +class CGPassManager : public ModulePass, public PMDataManager { +public: + static char ID; + explicit CGPassManager(int Depth) + : ModulePass(ID), PMDataManager(Depth) { } + + /// run - Execute all of the passes scheduled for execution. Keep track of + /// whether any of the passes modifies the module, and if so, return true. + bool runOnModule(Module &M); + + bool doInitialization(CallGraph &CG); + bool doFinalization(CallGraph &CG); + + /// Pass Manager itself does not invalidate any analysis info. + void getAnalysisUsage(AnalysisUsage &Info) const { + // CGPassManager walks SCC and it needs CallGraph. + Info.addRequired<CallGraph>(); + Info.setPreservesAll(); + } + + virtual const char *getPassName() const { + return "CallGraph Pass Manager"; + } + + virtual PMDataManager *getAsPMDataManager() { return this; } + virtual Pass *getAsPass() { return this; } + + // Print passes managed by this manager + void dumpPassStructure(unsigned Offset) { + errs().indent(Offset*2) << "Call Graph SCC Pass Manager\n"; + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + P->dumpPassStructure(Offset + 1); + dumpLastUses(P, Offset+1); + } + } + + Pass *getContainedPass(unsigned N) { + assert(N < PassVector.size() && "Pass number out of range!"); + return static_cast<Pass *>(PassVector[N]); + } + + virtual PassManagerType getPassManagerType() const { + return PMT_CallGraphPassManager; + } + +private: + bool RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, + bool &DevirtualizedCall); + + bool RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, + CallGraph &CG, bool &CallGraphUpToDate, + bool &DevirtualizedCall); + bool RefreshCallGraph(CallGraphSCC &CurSCC, CallGraph &CG, + bool IsCheckingMode); +}; + +} // end anonymous namespace. + +char CGPassManager::ID = 0; + + +bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, + CallGraph &CG, bool &CallGraphUpToDate, + bool &DevirtualizedCall) { + bool Changed = false; + PMDataManager *PM = P->getAsPMDataManager(); + + if (PM == 0) { + CallGraphSCCPass *CGSP = (CallGraphSCCPass*)P; + if (!CallGraphUpToDate) { + DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false); + CallGraphUpToDate = true; + } + + { + TimeRegion PassTimer(getPassTimer(CGSP)); + Changed = CGSP->runOnSCC(CurSCC); + } + + // After the CGSCCPass is done, when assertions are enabled, use + // RefreshCallGraph to verify that the callgraph was correctly updated. +#ifndef NDEBUG + if (Changed) + RefreshCallGraph(CurSCC, CG, true); +#endif + + return Changed; + } + + + assert(PM->getPassManagerType() == PMT_FunctionPassManager && + "Invalid CGPassManager member"); + FPPassManager *FPP = (FPPassManager*)P; + + // Run pass P on all functions in the current SCC. + for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end(); + I != E; ++I) { + if (Function *F = (*I)->getFunction()) { + dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName()); + TimeRegion PassTimer(getPassTimer(FPP)); + Changed |= FPP->runOnFunction(*F); + } + } + + // The function pass(es) modified the IR, they may have clobbered the + // callgraph. + if (Changed && CallGraphUpToDate) { + DEBUG(dbgs() << "CGSCCPASSMGR: Pass Dirtied SCC: " + << P->getPassName() << '\n'); + CallGraphUpToDate = false; + } + return Changed; +} + + +/// RefreshCallGraph - Scan the functions in the specified CFG and resync the +/// callgraph with the call sites found in it. This is used after +/// FunctionPasses have potentially munged the callgraph, and can be used after +/// CallGraphSCC passes to verify that they correctly updated the callgraph. +/// +/// This function returns true if it devirtualized an existing function call, +/// meaning it turned an indirect call into a direct call. This happens when +/// a function pass like GVN optimizes away stuff feeding the indirect call. +/// This never happens in checking mode. +/// +bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, + CallGraph &CG, bool CheckingMode) { + DenseMap<Value*, CallGraphNode*> CallSites; + + DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size() + << " nodes:\n"; + for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end(); + I != E; ++I) + (*I)->dump(); + ); + + bool MadeChange = false; + bool DevirtualizedCall = false; + + // Scan all functions in the SCC. + unsigned FunctionNo = 0; + for (CallGraphSCC::iterator SCCIdx = CurSCC.begin(), E = CurSCC.end(); + SCCIdx != E; ++SCCIdx, ++FunctionNo) { + CallGraphNode *CGN = *SCCIdx; + Function *F = CGN->getFunction(); + if (F == 0 || F->isDeclaration()) continue; + + // Walk the function body looking for call sites. Sync up the call sites in + // CGN with those actually in the function. + + // Keep track of the number of direct and indirect calls that were + // invalidated and removed. + unsigned NumDirectRemoved = 0, NumIndirectRemoved = 0; + + // Get the set of call sites currently in the function. + for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) { + // If this call site is null, then the function pass deleted the call + // entirely and the WeakVH nulled it out. + if (I->first == 0 || + // If we've already seen this call site, then the FunctionPass RAUW'd + // one call with another, which resulted in two "uses" in the edge + // list of the same call. + CallSites.count(I->first) || + + // If the call edge is not from a call or invoke, then the function + // pass RAUW'd a call with another value. This can happen when + // constant folding happens of well known functions etc. + !CallSite(I->first)) { + assert(!CheckingMode && + "CallGraphSCCPass did not update the CallGraph correctly!"); + + // If this was an indirect call site, count it. + if (I->second->getFunction() == 0) + ++NumIndirectRemoved; + else + ++NumDirectRemoved; + + // Just remove the edge from the set of callees, keep track of whether + // I points to the last element of the vector. + bool WasLast = I + 1 == E; + CGN->removeCallEdge(I); + + // If I pointed to the last element of the vector, we have to bail out: + // iterator checking rejects comparisons of the resultant pointer with + // end. + if (WasLast) + break; + E = CGN->end(); + continue; + } + + assert(!CallSites.count(I->first) && + "Call site occurs in node multiple times"); + CallSites.insert(std::make_pair(I->first, I->second)); + ++I; + } + + // Loop over all of the instructions in the function, getting the callsites. + // Keep track of the number of direct/indirect calls added. + unsigned NumDirectAdded = 0, NumIndirectAdded = 0; + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + CallSite CS(cast<Value>(I)); + if (!CS || isa<DbgInfoIntrinsic>(I)) continue; + + // If this call site already existed in the callgraph, just verify it + // matches up to expectations and remove it from CallSites. + DenseMap<Value*, CallGraphNode*>::iterator ExistingIt = + CallSites.find(CS.getInstruction()); + if (ExistingIt != CallSites.end()) { + CallGraphNode *ExistingNode = ExistingIt->second; + + // Remove from CallSites since we have now seen it. + CallSites.erase(ExistingIt); + + // Verify that the callee is right. + if (ExistingNode->getFunction() == CS.getCalledFunction()) + continue; + + // If we are in checking mode, we are not allowed to actually mutate + // the callgraph. If this is a case where we can infer that the + // callgraph is less precise than it could be (e.g. an indirect call + // site could be turned direct), don't reject it in checking mode, and + // don't tweak it to be more precise. + if (CheckingMode && CS.getCalledFunction() && + ExistingNode->getFunction() == 0) + continue; + + assert(!CheckingMode && + "CallGraphSCCPass did not update the CallGraph correctly!"); + + // If not, we either went from a direct call to indirect, indirect to + // direct, or direct to different direct. + CallGraphNode *CalleeNode; + if (Function *Callee = CS.getCalledFunction()) { + CalleeNode = CG.getOrInsertFunction(Callee); + // Keep track of whether we turned an indirect call into a direct + // one. + if (ExistingNode->getFunction() == 0) { + DevirtualizedCall = true; + DEBUG(dbgs() << " CGSCCPASSMGR: Devirtualized call to '" + << Callee->getName() << "'\n"); + } + } else { + CalleeNode = CG.getCallsExternalNode(); + } + + // Update the edge target in CGN. + CGN->replaceCallEdge(CS, CS, CalleeNode); + MadeChange = true; + continue; + } + + assert(!CheckingMode && + "CallGraphSCCPass did not update the CallGraph correctly!"); + + // If the call site didn't exist in the CGN yet, add it. + CallGraphNode *CalleeNode; + if (Function *Callee = CS.getCalledFunction()) { + CalleeNode = CG.getOrInsertFunction(Callee); + ++NumDirectAdded; + } else { + CalleeNode = CG.getCallsExternalNode(); + ++NumIndirectAdded; + } + + CGN->addCalledFunction(CS, CalleeNode); + MadeChange = true; + } + + // We scanned the old callgraph node, removing invalidated call sites and + // then added back newly found call sites. One thing that can happen is + // that an old indirect call site was deleted and replaced with a new direct + // call. In this case, we have devirtualized a call, and CGSCCPM would like + // to iteratively optimize the new code. Unfortunately, we don't really + // have a great way to detect when this happens. As an approximation, we + // just look at whether the number of indirect calls is reduced and the + // number of direct calls is increased. There are tons of ways to fool this + // (e.g. DCE'ing an indirect call and duplicating an unrelated block with a + // direct call) but this is close enough. + if (NumIndirectRemoved > NumIndirectAdded && + NumDirectRemoved < NumDirectAdded) + DevirtualizedCall = true; + + // After scanning this function, if we still have entries in callsites, then + // they are dangling pointers. WeakVH should save us for this, so abort if + // this happens. + assert(CallSites.empty() && "Dangling pointers found in call sites map"); + + // Periodically do an explicit clear to remove tombstones when processing + // large scc's. + if ((FunctionNo & 15) == 15) + CallSites.clear(); + } + + DEBUG(if (MadeChange) { + dbgs() << "CGSCCPASSMGR: Refreshed SCC is now:\n"; + for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end(); + I != E; ++I) + (*I)->dump(); + if (DevirtualizedCall) + dbgs() << "CGSCCPASSMGR: Refresh devirtualized a call!\n"; + + } else { + dbgs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n"; + } + ); + + return DevirtualizedCall; +} + +/// RunAllPassesOnSCC - Execute the body of the entire pass manager on the +/// specified SCC. This keeps track of whether a function pass devirtualizes +/// any calls and returns it in DevirtualizedCall. +bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, + bool &DevirtualizedCall) { + bool Changed = false; + + // CallGraphUpToDate - Keep track of whether the callgraph is known to be + // up-to-date or not. The CGSSC pass manager runs two types of passes: + // CallGraphSCC Passes and other random function passes. Because other + // random function passes are not CallGraph aware, they may clobber the + // call graph by introducing new calls or deleting other ones. This flag + // is set to false when we run a function pass so that we know to clean up + // the callgraph when we need to run a CGSCCPass again. + bool CallGraphUpToDate = true; + + // Run all passes on current SCC. + for (unsigned PassNo = 0, e = getNumContainedPasses(); + PassNo != e; ++PassNo) { + Pass *P = getContainedPass(PassNo); + + // If we're in -debug-pass=Executions mode, construct the SCC node list, + // otherwise avoid constructing this string as it is expensive. + if (isPassDebuggingExecutionsOrMore()) { + std::string Functions; + #ifndef NDEBUG + raw_string_ostream OS(Functions); + for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end(); + I != E; ++I) { + if (I != CurSCC.begin()) OS << ", "; + (*I)->print(OS); + } + OS.flush(); + #endif + dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, Functions); + } + dumpRequiredSet(P); + + initializeAnalysisImpl(P); + + // Actually run this pass on the current SCC. + Changed |= RunPassOnSCC(P, CurSCC, CG, + CallGraphUpToDate, DevirtualizedCall); + + if (Changed) + dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, ""); + dumpPreservedSet(P); + + verifyPreservedAnalysis(P); + removeNotPreservedAnalysis(P); + recordAvailableAnalysis(P); + removeDeadPasses(P, "", ON_CG_MSG); + } + + // If the callgraph was left out of date (because the last pass run was a + // functionpass), refresh it before we move on to the next SCC. + if (!CallGraphUpToDate) + DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false); + return Changed; +} + +/// run - Execute all of the passes scheduled for execution. Keep track of +/// whether any of the passes modifies the module, and if so, return true. +bool CGPassManager::runOnModule(Module &M) { + CallGraph &CG = getAnalysis<CallGraph>(); + bool Changed = doInitialization(CG); + + // Walk the callgraph in bottom-up SCC order. + scc_iterator<CallGraph*> CGI = scc_begin(&CG); + + CallGraphSCC CurSCC(&CGI); + while (!CGI.isAtEnd()) { + // Copy the current SCC and increment past it so that the pass can hack + // on the SCC if it wants to without invalidating our iterator. + std::vector<CallGraphNode*> &NodeVec = *CGI; + CurSCC.initialize(&NodeVec[0], &NodeVec[0]+NodeVec.size()); + ++CGI; + + // At the top level, we run all the passes in this pass manager on the + // functions in this SCC. However, we support iterative compilation in the + // case where a function pass devirtualizes a call to a function. For + // example, it is very common for a function pass (often GVN or instcombine) + // to eliminate the addressing that feeds into a call. With that improved + // information, we would like the call to be an inline candidate, infer + // mod-ref information etc. + // + // Because of this, we allow iteration up to a specified iteration count. + // This only happens in the case of a devirtualized call, so we only burn + // compile time in the case that we're making progress. We also have a hard + // iteration count limit in case there is crazy code. + unsigned Iteration = 0; + bool DevirtualizedCall = false; + do { + DEBUG(if (Iteration) + dbgs() << " SCCPASSMGR: Re-visiting SCC, iteration #" + << Iteration << '\n'); + DevirtualizedCall = false; + Changed |= RunAllPassesOnSCC(CurSCC, CG, DevirtualizedCall); + } while (Iteration++ < MaxIterations && DevirtualizedCall); + + if (DevirtualizedCall) + DEBUG(dbgs() << " CGSCCPASSMGR: Stopped iteration after " << Iteration + << " times, due to -max-cg-scc-iterations\n"); + + if (Iteration > MaxSCCIterations) + MaxSCCIterations = Iteration; + + } + Changed |= doFinalization(CG); + return Changed; +} + + +/// Initialize CG +bool CGPassManager::doInitialization(CallGraph &CG) { + bool Changed = false; + for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) { + if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) { + assert(PM->getPassManagerType() == PMT_FunctionPassManager && + "Invalid CGPassManager member"); + Changed |= ((FPPassManager*)PM)->doInitialization(CG.getModule()); + } else { + Changed |= ((CallGraphSCCPass*)getContainedPass(i))->doInitialization(CG); + } + } + return Changed; +} + +/// Finalize CG +bool CGPassManager::doFinalization(CallGraph &CG) { + bool Changed = false; + for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) { + if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) { + assert(PM->getPassManagerType() == PMT_FunctionPassManager && + "Invalid CGPassManager member"); + Changed |= ((FPPassManager*)PM)->doFinalization(CG.getModule()); + } else { + Changed |= ((CallGraphSCCPass*)getContainedPass(i))->doFinalization(CG); + } + } + return Changed; +} + +//===----------------------------------------------------------------------===// +// CallGraphSCC Implementation +//===----------------------------------------------------------------------===// + +/// ReplaceNode - This informs the SCC and the pass manager that the specified +/// Old node has been deleted, and New is to be used in its place. +void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) { + assert(Old != New && "Should not replace node with self"); + for (unsigned i = 0; ; ++i) { + assert(i != Nodes.size() && "Node not in SCC"); + if (Nodes[i] != Old) continue; + Nodes[i] = New; + break; + } + + // Update the active scc_iterator so that it doesn't contain dangling + // pointers to the old CallGraphNode. + scc_iterator<CallGraph*> *CGI = (scc_iterator<CallGraph*>*)Context; + CGI->ReplaceNode(Old, New); +} + + +//===----------------------------------------------------------------------===// +// CallGraphSCCPass Implementation +//===----------------------------------------------------------------------===// + +/// Assign pass manager to manage this pass. +void CallGraphSCCPass::assignPassManager(PMStack &PMS, + PassManagerType PreferredType) { + // Find CGPassManager + while (!PMS.empty() && + PMS.top()->getPassManagerType() > PMT_CallGraphPassManager) + PMS.pop(); + + assert(!PMS.empty() && "Unable to handle Call Graph Pass"); + CGPassManager *CGP; + + if (PMS.top()->getPassManagerType() == PMT_CallGraphPassManager) + CGP = (CGPassManager*)PMS.top(); + else { + // Create new Call Graph SCC Pass Manager if it does not exist. + assert(!PMS.empty() && "Unable to create Call Graph Pass Manager"); + PMDataManager *PMD = PMS.top(); + + // [1] Create new Call Graph Pass Manager + CGP = new CGPassManager(PMD->getDepth() + 1); + + // [2] Set up new manager's top level manager + PMTopLevelManager *TPM = PMD->getTopLevelManager(); + TPM->addIndirectPassManager(CGP); + + // [3] Assign manager to manage this new manager. This may create + // and push new managers into PMS + Pass *P = CGP; + TPM->schedulePass(P); + + // [4] Push new manager into PMS + PMS.push(CGP); + } + + CGP->add(this); +} + +/// getAnalysisUsage - For this class, we declare that we require and preserve +/// the call graph. If the derived class implements this method, it should +/// always explicitly call the implementation here. +void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<CallGraph>(); + AU.addPreserved<CallGraph>(); +} + + +//===----------------------------------------------------------------------===// +// PrintCallGraphPass Implementation +//===----------------------------------------------------------------------===// + +namespace { + /// PrintCallGraphPass - Print a Module corresponding to a call graph. + /// + class PrintCallGraphPass : public CallGraphSCCPass { + std::string Banner; + raw_ostream &Out; // raw_ostream to print on. + + public: + static char ID; + PrintCallGraphPass() : CallGraphSCCPass(ID), Out(dbgs()) {} + PrintCallGraphPass(const std::string &B, raw_ostream &o) + : CallGraphSCCPass(ID), Banner(B), Out(o) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + bool runOnSCC(CallGraphSCC &SCC) { + Out << Banner; + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) + (*I)->getFunction()->print(Out); + return false; + } + }; + +} // end anonymous namespace. + +char PrintCallGraphPass::ID = 0; + +Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &O, + const std::string &Banner) const { + return new PrintCallGraphPass(Banner, O); +} + diff --git a/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp b/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp new file mode 100644 index 0000000..8eed9d6 --- /dev/null +++ b/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp @@ -0,0 +1,103 @@ +//===- FindUsedTypes.cpp - Find all Types used by a module ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is used to seek out all of the types in use by the program. Note +// that this analysis explicitly does not include types only used by the symbol +// table. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/FindUsedTypes.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +char FindUsedTypes::ID = 0; +INITIALIZE_PASS(FindUsedTypes, "print-used-types", + "Find Used Types", false, true); + +// IncorporateType - Incorporate one type and all of its subtypes into the +// collection of used types. +// +void FindUsedTypes::IncorporateType(const Type *Ty) { + // If ty doesn't already exist in the used types map, add it now, otherwise + // return. + if (!UsedTypes.insert(Ty).second) return; // Already contain Ty. + + // Make sure to add any types this type references now. + // + for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end(); + I != E; ++I) + IncorporateType(*I); +} + +void FindUsedTypes::IncorporateValue(const Value *V) { + IncorporateType(V->getType()); + + // If this is a constant, it could be using other types... + if (const Constant *C = dyn_cast<Constant>(V)) { + if (!isa<GlobalValue>(C)) + for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end(); + OI != OE; ++OI) + IncorporateValue(*OI); + } +} + + +// run - This incorporates all types used by the specified module +// +bool FindUsedTypes::runOnModule(Module &m) { + UsedTypes.clear(); // reset if run multiple times... + + // Loop over global variables, incorporating their types + for (Module::const_global_iterator I = m.global_begin(), E = m.global_end(); + I != E; ++I) { + IncorporateType(I->getType()); + if (I->hasInitializer()) + IncorporateValue(I->getInitializer()); + } + + for (Module::iterator MI = m.begin(), ME = m.end(); MI != ME; ++MI) { + IncorporateType(MI->getType()); + const Function &F = *MI; + + // Loop over all of the instructions in the function, adding their return + // type as well as the types of their operands. + // + for (const_inst_iterator II = inst_begin(F), IE = inst_end(F); + II != IE; ++II) { + const Instruction &I = *II; + + IncorporateType(I.getType()); // Incorporate the type of the instruction + for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end(); + OI != OE; ++OI) + IncorporateValue(*OI); // Insert inst operand types as well + } + } + + return false; +} + +// Print the types found in the module. If the optional Module parameter is +// passed in, then the types are printed symbolically if possible, using the +// symbol table from the module. +// +void FindUsedTypes::print(raw_ostream &OS, const Module *M) const { + OS << "Types in use by this module:\n"; + for (std::set<const Type *>::const_iterator I = UsedTypes.begin(), + E = UsedTypes.end(); I != E; ++I) { + OS << " "; + WriteTypeSymbolic(OS, *I, M); + OS << '\n'; + } +} diff --git a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp new file mode 100644 index 0000000..6759b0a --- /dev/null +++ b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp @@ -0,0 +1,586 @@ +//===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This simple pass provides alias and mod/ref information for global values +// that do not have their address taken, and keeps track of whether functions +// read or write memory (are "pure"). For this simple (but very common) case, +// we can provide pretty accurate and useful information. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "globalsmodref-aa" +#include "llvm/Analysis/Passes.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Instructions.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SCCIterator.h" +#include <set> +using namespace llvm; + +STATISTIC(NumNonAddrTakenGlobalVars, + "Number of global vars without address taken"); +STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken"); +STATISTIC(NumNoMemFunctions, "Number of functions that do not access memory"); +STATISTIC(NumReadMemFunctions, "Number of functions that only read memory"); +STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects"); + +namespace { + /// FunctionRecord - One instance of this structure is stored for every + /// function in the program. Later, the entries for these functions are + /// removed if the function is found to call an external function (in which + /// case we know nothing about it. + struct FunctionRecord { + /// GlobalInfo - Maintain mod/ref info for all of the globals without + /// addresses taken that are read or written (transitively) by this + /// function. + std::map<const GlobalValue*, unsigned> GlobalInfo; + + /// MayReadAnyGlobal - May read global variables, but it is not known which. + bool MayReadAnyGlobal; + + unsigned getInfoForGlobal(const GlobalValue *GV) const { + unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0; + std::map<const GlobalValue*, unsigned>::const_iterator I = + GlobalInfo.find(GV); + if (I != GlobalInfo.end()) + Effect |= I->second; + return Effect; + } + + /// FunctionEffect - Capture whether or not this function reads or writes to + /// ANY memory. If not, we can do a lot of aggressive analysis on it. + unsigned FunctionEffect; + + FunctionRecord() : MayReadAnyGlobal (false), FunctionEffect(0) {} + }; + + /// GlobalsModRef - The actual analysis pass. + class GlobalsModRef : public ModulePass, public AliasAnalysis { + /// NonAddressTakenGlobals - The globals that do not have their addresses + /// taken. + std::set<const GlobalValue*> NonAddressTakenGlobals; + + /// IndirectGlobals - The memory pointed to by this global is known to be + /// 'owned' by the global. + std::set<const GlobalValue*> IndirectGlobals; + + /// AllocsForIndirectGlobals - If an instruction allocates memory for an + /// indirect global, this map indicates which one. + std::map<const Value*, const GlobalValue*> AllocsForIndirectGlobals; + + /// FunctionInfo - For each function, keep track of what globals are + /// modified or read. + std::map<const Function*, FunctionRecord> FunctionInfo; + + public: + static char ID; + GlobalsModRef() : ModulePass(ID) {} + + bool runOnModule(Module &M) { + InitializeAliasAnalysis(this); // set up super class + AnalyzeGlobals(M); // find non-addr taken globals + AnalyzeCallGraph(getAnalysis<CallGraph>(), M); // Propagate on CG + return false; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AliasAnalysis::getAnalysisUsage(AU); + AU.addRequired<CallGraph>(); + AU.setPreservesAll(); // Does not transform code + } + + //------------------------------------------------ + // Implement the AliasAnalysis API + // + AliasResult alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size); + ModRefResult getModRefInfo(ImmutableCallSite CS, + const Value *P, unsigned Size); + ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + return AliasAnalysis::getModRefInfo(CS1, CS2); + } + + /// getModRefBehavior - Return the behavior of the specified function if + /// called from the specified call site. The call site may be null in which + /// case the most generic behavior of this function should be returned. + ModRefBehavior getModRefBehavior(const Function *F) { + if (FunctionRecord *FR = getFunctionInfo(F)) { + if (FR->FunctionEffect == 0) + return DoesNotAccessMemory; + else if ((FR->FunctionEffect & Mod) == 0) + return OnlyReadsMemory; + } + return AliasAnalysis::getModRefBehavior(F); + } + + /// getModRefBehavior - Return the behavior of the specified function if + /// called from the specified call site. The call site may be null in which + /// case the most generic behavior of this function should be returned. + ModRefBehavior getModRefBehavior(ImmutableCallSite CS) { + const Function* F = CS.getCalledFunction(); + if (!F) return AliasAnalysis::getModRefBehavior(CS); + if (FunctionRecord *FR = getFunctionInfo(F)) { + if (FR->FunctionEffect == 0) + return DoesNotAccessMemory; + else if ((FR->FunctionEffect & Mod) == 0) + return OnlyReadsMemory; + } + return AliasAnalysis::getModRefBehavior(CS); + } + + virtual void deleteValue(Value *V); + virtual void copyValue(Value *From, Value *To); + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + + private: + /// getFunctionInfo - Return the function info for the function, or null if + /// we don't have anything useful to say about it. + FunctionRecord *getFunctionInfo(const Function *F) { + std::map<const Function*, FunctionRecord>::iterator I = + FunctionInfo.find(F); + if (I != FunctionInfo.end()) + return &I->second; + return 0; + } + + void AnalyzeGlobals(Module &M); + void AnalyzeCallGraph(CallGraph &CG, Module &M); + bool AnalyzeUsesOfPointer(Value *V, std::vector<Function*> &Readers, + std::vector<Function*> &Writers, + GlobalValue *OkayStoreDest = 0); + bool AnalyzeIndirectGlobalMemory(GlobalValue *GV); + }; +} + +char GlobalsModRef::ID = 0; +INITIALIZE_AG_PASS(GlobalsModRef, AliasAnalysis, + "globalsmodref-aa", "Simple mod/ref analysis for globals", + false, true, false); + +Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); } + +/// AnalyzeGlobals - Scan through the users of all of the internal +/// GlobalValue's in the program. If none of them have their "address taken" +/// (really, their address passed to something nontrivial), record this fact, +/// and record the functions that they are used directly in. +void GlobalsModRef::AnalyzeGlobals(Module &M) { + std::vector<Function*> Readers, Writers; + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (I->hasLocalLinkage()) { + if (!AnalyzeUsesOfPointer(I, Readers, Writers)) { + // Remember that we are tracking this global. + NonAddressTakenGlobals.insert(I); + ++NumNonAddrTakenFunctions; + } + Readers.clear(); Writers.clear(); + } + + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + if (I->hasLocalLinkage()) { + if (!AnalyzeUsesOfPointer(I, Readers, Writers)) { + // Remember that we are tracking this global, and the mod/ref fns + NonAddressTakenGlobals.insert(I); + + for (unsigned i = 0, e = Readers.size(); i != e; ++i) + FunctionInfo[Readers[i]].GlobalInfo[I] |= Ref; + + if (!I->isConstant()) // No need to keep track of writers to constants + for (unsigned i = 0, e = Writers.size(); i != e; ++i) + FunctionInfo[Writers[i]].GlobalInfo[I] |= Mod; + ++NumNonAddrTakenGlobalVars; + + // If this global holds a pointer type, see if it is an indirect global. + if (I->getType()->getElementType()->isPointerTy() && + AnalyzeIndirectGlobalMemory(I)) + ++NumIndirectGlobalVars; + } + Readers.clear(); Writers.clear(); + } +} + +/// AnalyzeUsesOfPointer - Look at all of the users of the specified pointer. +/// If this is used by anything complex (i.e., the address escapes), return +/// true. Also, while we are at it, keep track of those functions that read and +/// write to the value. +/// +/// If OkayStoreDest is non-null, stores into this global are allowed. +bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, + std::vector<Function*> &Readers, + std::vector<Function*> &Writers, + GlobalValue *OkayStoreDest) { + if (!V->getType()->isPointerTy()) return true; + + for (Value::use_iterator UI = V->use_begin(), E=V->use_end(); UI != E; ++UI) { + User *U = *UI; + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { + Readers.push_back(LI->getParent()->getParent()); + } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + if (V == SI->getOperand(1)) { + Writers.push_back(SI->getParent()->getParent()); + } else if (SI->getOperand(1) != OkayStoreDest) { + return true; // Storing the pointer + } + } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { + if (AnalyzeUsesOfPointer(GEP, Readers, Writers)) return true; + } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { + if (AnalyzeUsesOfPointer(BCI, Readers, Writers, OkayStoreDest)) + return true; + } else if (isFreeCall(U)) { + Writers.push_back(cast<Instruction>(U)->getParent()->getParent()); + } else if (CallInst *CI = dyn_cast<CallInst>(U)) { + // Make sure that this is just the function being called, not that it is + // passing into the function. + for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) + if (CI->getArgOperand(i) == V) return true; + } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) { + // Make sure that this is just the function being called, not that it is + // passing into the function. + for (unsigned i = 0, e = II->getNumArgOperands(); i != e; ++i) + if (II->getArgOperand(i) == V) return true; + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { + if (CE->getOpcode() == Instruction::GetElementPtr || + CE->getOpcode() == Instruction::BitCast) { + if (AnalyzeUsesOfPointer(CE, Readers, Writers)) + return true; + } else { + return true; + } + } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) { + if (!isa<ConstantPointerNull>(ICI->getOperand(1))) + return true; // Allow comparison against null. + } else { + return true; + } + } + + return false; +} + +/// AnalyzeIndirectGlobalMemory - We found an non-address-taken global variable +/// which holds a pointer type. See if the global always points to non-aliased +/// heap memory: that is, all initializers of the globals are allocations, and +/// those allocations have no use other than initialization of the global. +/// Further, all loads out of GV must directly use the memory, not store the +/// pointer somewhere. If this is true, we consider the memory pointed to by +/// GV to be owned by GV and can disambiguate other pointers from it. +bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { + // Keep track of values related to the allocation of the memory, f.e. the + // value produced by the malloc call and any casts. + std::vector<Value*> AllocRelatedValues; + + // Walk the user list of the global. If we find anything other than a direct + // load or store, bail out. + for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){ + User *U = *I; + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { + // The pointer loaded from the global can only be used in simple ways: + // we allow addressing of it and loading storing to it. We do *not* allow + // storing the loaded pointer somewhere else or passing to a function. + std::vector<Function*> ReadersWriters; + if (AnalyzeUsesOfPointer(LI, ReadersWriters, ReadersWriters)) + return false; // Loaded pointer escapes. + // TODO: Could try some IP mod/ref of the loaded pointer. + } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + // Storing the global itself. + if (SI->getOperand(0) == GV) return false; + + // If storing the null pointer, ignore it. + if (isa<ConstantPointerNull>(SI->getOperand(0))) + continue; + + // Check the value being stored. + Value *Ptr = SI->getOperand(0)->getUnderlyingObject(); + + if (isMalloc(Ptr)) { + // Okay, easy case. + } else if (CallInst *CI = dyn_cast<CallInst>(Ptr)) { + Function *F = CI->getCalledFunction(); + if (!F || !F->isDeclaration()) return false; // Too hard to analyze. + if (F->getName() != "calloc") return false; // Not calloc. + } else { + return false; // Too hard to analyze. + } + + // Analyze all uses of the allocation. If any of them are used in a + // non-simple way (e.g. stored to another global) bail out. + std::vector<Function*> ReadersWriters; + if (AnalyzeUsesOfPointer(Ptr, ReadersWriters, ReadersWriters, GV)) + return false; // Loaded pointer escapes. + + // Remember that this allocation is related to the indirect global. + AllocRelatedValues.push_back(Ptr); + } else { + // Something complex, bail out. + return false; + } + } + + // Okay, this is an indirect global. Remember all of the allocations for + // this global in AllocsForIndirectGlobals. + while (!AllocRelatedValues.empty()) { + AllocsForIndirectGlobals[AllocRelatedValues.back()] = GV; + AllocRelatedValues.pop_back(); + } + IndirectGlobals.insert(GV); + return true; +} + +/// AnalyzeCallGraph - At this point, we know the functions where globals are +/// immediately stored to and read from. Propagate this information up the call +/// graph to all callers and compute the mod/ref info for all memory for each +/// function. +void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { + // We do a bottom-up SCC traversal of the call graph. In other words, we + // visit all callees before callers (leaf-first). + for (scc_iterator<CallGraph*> I = scc_begin(&CG), E = scc_end(&CG); I != E; + ++I) { + std::vector<CallGraphNode *> &SCC = *I; + assert(!SCC.empty() && "SCC with no functions?"); + + if (!SCC[0]->getFunction()) { + // Calls externally - can't say anything useful. Remove any existing + // function records (may have been created when scanning globals). + for (unsigned i = 0, e = SCC.size(); i != e; ++i) + FunctionInfo.erase(SCC[i]->getFunction()); + continue; + } + + FunctionRecord &FR = FunctionInfo[SCC[0]->getFunction()]; + + bool KnowNothing = false; + unsigned FunctionEffect = 0; + + // Collect the mod/ref properties due to called functions. We only compute + // one mod-ref set. + for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) { + Function *F = SCC[i]->getFunction(); + if (!F) { + KnowNothing = true; + break; + } + + if (F->isDeclaration()) { + // Try to get mod/ref behaviour from function attributes. + if (F->doesNotAccessMemory()) { + // Can't do better than that! + } else if (F->onlyReadsMemory()) { + FunctionEffect |= Ref; + if (!F->isIntrinsic()) + // This function might call back into the module and read a global - + // consider every global as possibly being read by this function. + FR.MayReadAnyGlobal = true; + } else { + FunctionEffect |= ModRef; + // Can't say anything useful unless it's an intrinsic - they don't + // read or write global variables of the kind considered here. + KnowNothing = !F->isIntrinsic(); + } + continue; + } + + for (CallGraphNode::iterator CI = SCC[i]->begin(), E = SCC[i]->end(); + CI != E && !KnowNothing; ++CI) + if (Function *Callee = CI->second->getFunction()) { + if (FunctionRecord *CalleeFR = getFunctionInfo(Callee)) { + // Propagate function effect up. + FunctionEffect |= CalleeFR->FunctionEffect; + + // Incorporate callee's effects on globals into our info. + for (std::map<const GlobalValue*, unsigned>::iterator GI = + CalleeFR->GlobalInfo.begin(), E = CalleeFR->GlobalInfo.end(); + GI != E; ++GI) + FR.GlobalInfo[GI->first] |= GI->second; + FR.MayReadAnyGlobal |= CalleeFR->MayReadAnyGlobal; + } else { + // Can't say anything about it. However, if it is inside our SCC, + // then nothing needs to be done. + CallGraphNode *CalleeNode = CG[Callee]; + if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end()) + KnowNothing = true; + } + } else { + KnowNothing = true; + } + } + + // If we can't say anything useful about this SCC, remove all SCC functions + // from the FunctionInfo map. + if (KnowNothing) { + for (unsigned i = 0, e = SCC.size(); i != e; ++i) + FunctionInfo.erase(SCC[i]->getFunction()); + continue; + } + + // Scan the function bodies for explicit loads or stores. + for (unsigned i = 0, e = SCC.size(); i != e && FunctionEffect != ModRef;++i) + for (inst_iterator II = inst_begin(SCC[i]->getFunction()), + E = inst_end(SCC[i]->getFunction()); + II != E && FunctionEffect != ModRef; ++II) + if (isa<LoadInst>(*II)) { + FunctionEffect |= Ref; + if (cast<LoadInst>(*II).isVolatile()) + // Volatile loads may have side-effects, so mark them as writing + // memory (for example, a flag inside the processor). + FunctionEffect |= Mod; + } else if (isa<StoreInst>(*II)) { + FunctionEffect |= Mod; + if (cast<StoreInst>(*II).isVolatile()) + // Treat volatile stores as reading memory somewhere. + FunctionEffect |= Ref; + } else if (isMalloc(&cast<Instruction>(*II)) || + isFreeCall(&cast<Instruction>(*II))) { + FunctionEffect |= ModRef; + } + + if ((FunctionEffect & Mod) == 0) + ++NumReadMemFunctions; + if (FunctionEffect == 0) + ++NumNoMemFunctions; + FR.FunctionEffect = FunctionEffect; + + // Finally, now that we know the full effect on this SCC, clone the + // information to each function in the SCC. + for (unsigned i = 1, e = SCC.size(); i != e; ++i) + FunctionInfo[SCC[i]->getFunction()] = FR; + } +} + + + +/// alias - If one of the pointers is to a global that we are tracking, and the +/// other is some random pointer, we know there cannot be an alias, because the +/// address of the global isn't taken. +AliasAnalysis::AliasResult +GlobalsModRef::alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size) { + // Get the base object these pointers point to. + const Value *UV1 = V1->getUnderlyingObject(); + const Value *UV2 = V2->getUnderlyingObject(); + + // If either of the underlying values is a global, they may be non-addr-taken + // globals, which we can answer queries about. + const GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1); + const GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2); + if (GV1 || GV2) { + // If the global's address is taken, pretend we don't know it's a pointer to + // the global. + if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = 0; + if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = 0; + + // If the two pointers are derived from two different non-addr-taken + // globals, or if one is and the other isn't, we know these can't alias. + if ((GV1 || GV2) && GV1 != GV2) + return NoAlias; + + // Otherwise if they are both derived from the same addr-taken global, we + // can't know the two accesses don't overlap. + } + + // These pointers may be based on the memory owned by an indirect global. If + // so, we may be able to handle this. First check to see if the base pointer + // is a direct load from an indirect global. + GV1 = GV2 = 0; + if (const LoadInst *LI = dyn_cast<LoadInst>(UV1)) + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0))) + if (IndirectGlobals.count(GV)) + GV1 = GV; + if (const LoadInst *LI = dyn_cast<LoadInst>(UV2)) + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0))) + if (IndirectGlobals.count(GV)) + GV2 = GV; + + // These pointers may also be from an allocation for the indirect global. If + // so, also handle them. + if (AllocsForIndirectGlobals.count(UV1)) + GV1 = AllocsForIndirectGlobals[UV1]; + if (AllocsForIndirectGlobals.count(UV2)) + GV2 = AllocsForIndirectGlobals[UV2]; + + // Now that we know whether the two pointers are related to indirect globals, + // use this to disambiguate the pointers. If either pointer is based on an + // indirect global and if they are not both based on the same indirect global, + // they cannot alias. + if ((GV1 || GV2) && GV1 != GV2) + return NoAlias; + + return AliasAnalysis::alias(V1, V1Size, V2, V2Size); +} + +AliasAnalysis::ModRefResult +GlobalsModRef::getModRefInfo(ImmutableCallSite CS, + const Value *P, unsigned Size) { + unsigned Known = ModRef; + + // If we are asking for mod/ref info of a direct call with a pointer to a + // global we are tracking, return information if we have it. + if (const GlobalValue *GV = dyn_cast<GlobalValue>(P->getUnderlyingObject())) + if (GV->hasLocalLinkage()) + if (const Function *F = CS.getCalledFunction()) + if (NonAddressTakenGlobals.count(GV)) + if (const FunctionRecord *FR = getFunctionInfo(F)) + Known = FR->getInfoForGlobal(GV); + + if (Known == NoModRef) + return NoModRef; // No need to query other mod/ref analyses + return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, P, Size)); +} + + +//===----------------------------------------------------------------------===// +// Methods to update the analysis as a result of the client transformation. +// +void GlobalsModRef::deleteValue(Value *V) { + if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { + if (NonAddressTakenGlobals.erase(GV)) { + // This global might be an indirect global. If so, remove it and remove + // any AllocRelatedValues for it. + if (IndirectGlobals.erase(GV)) { + // Remove any entries in AllocsForIndirectGlobals for this global. + for (std::map<const Value*, const GlobalValue*>::iterator + I = AllocsForIndirectGlobals.begin(), + E = AllocsForIndirectGlobals.end(); I != E; ) { + if (I->second == GV) { + AllocsForIndirectGlobals.erase(I++); + } else { + ++I; + } + } + } + } + } + + // Otherwise, if this is an allocation related to an indirect global, remove + // it. + AllocsForIndirectGlobals.erase(V); + + AliasAnalysis::deleteValue(V); +} + +void GlobalsModRef::copyValue(Value *From, Value *To) { + AliasAnalysis::copyValue(From, To); +} diff --git a/contrib/llvm/lib/Analysis/IPA/Makefile b/contrib/llvm/lib/Analysis/IPA/Makefile new file mode 100644 index 0000000..b850c9f --- /dev/null +++ b/contrib/llvm/lib/Analysis/IPA/Makefile @@ -0,0 +1,15 @@ +##===- lib/Analysis/IPA/Makefile ---------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMipa +BUILD_ARCHIVE = 1 + +include $(LEVEL)/Makefile.common + diff --git a/contrib/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm/lib/Analysis/IVUsers.cpp new file mode 100644 index 0000000..cdf667a --- /dev/null +++ b/contrib/llvm/lib/Analysis/IVUsers.cpp @@ -0,0 +1,255 @@ +//===- IVUsers.cpp - Induction Variable Users -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements bookkeeping for "interesting" users of expressions +// computed from induction variables. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "iv-users" +#include "llvm/Analysis/IVUsers.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Type.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +using namespace llvm; + +char IVUsers::ID = 0; +INITIALIZE_PASS(IVUsers, "iv-users", "Induction Variable Users", false, true); + +Pass *llvm::createIVUsersPass() { + return new IVUsers(); +} + +/// isInteresting - Test whether the given expression is "interesting" when +/// used by the given expression, within the context of analyzing the +/// given loop. +static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, + ScalarEvolution *SE) { + // An addrec is interesting if it's affine or if it has an interesting start. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + // Keep things simple. Don't touch loop-variant strides. + if (AR->getLoop() == L) + return AR->isAffine() || !L->contains(I); + // Otherwise recurse to see if the start value is interesting, and that + // the step value is not interesting, since we don't yet know how to + // do effective SCEV expansions for addrecs with interesting steps. + return isInteresting(AR->getStart(), I, L, SE) && + !isInteresting(AR->getStepRecurrence(*SE), I, L, SE); + } + + // An add is interesting if exactly one of its operands is interesting. + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + bool AnyInterestingYet = false; + for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end(); + OI != OE; ++OI) + if (isInteresting(*OI, I, L, SE)) { + if (AnyInterestingYet) + return false; + AnyInterestingYet = true; + } + return AnyInterestingYet; + } + + // Nothing else is interesting here. + return false; +} + +/// AddUsersIfInteresting - Inspect the specified instruction. If it is a +/// reducible SCEV, recursively add its users to the IVUsesByStride set and +/// return true. Otherwise, return false. +bool IVUsers::AddUsersIfInteresting(Instruction *I) { + if (!SE->isSCEVable(I->getType())) + return false; // Void and FP expressions cannot be reduced. + + // LSR is not APInt clean, do not touch integers bigger than 64-bits. + if (SE->getTypeSizeInBits(I->getType()) > 64) + return false; + + if (!Processed.insert(I)) + return true; // Instruction already handled. + + // Get the symbolic expression for this instruction. + const SCEV *ISE = SE->getSCEV(I); + + // If we've come to an uninteresting expression, stop the traversal and + // call this a user. + if (!isInteresting(ISE, I, L, SE)) + return false; + + SmallPtrSet<Instruction *, 4> UniqueUsers; + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + if (!UniqueUsers.insert(User)) + continue; + + // Do not infinitely recurse on PHI nodes. + if (isa<PHINode>(User) && Processed.count(User)) + continue; + + // Descend recursively, but not into PHI nodes outside the current loop. + // It's important to see the entire expression outside the loop to get + // choices that depend on addressing mode use right, although we won't + // consider references outside the loop in all cases. + // If User is already in Processed, we don't want to recurse into it again, + // but do want to record a second reference in the same instruction. + bool AddUserToIVUsers = false; + if (LI->getLoopFor(User->getParent()) != L) { + if (isa<PHINode>(User) || Processed.count(User) || + !AddUsersIfInteresting(User)) { + DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n' + << " OF SCEV: " << *ISE << '\n'); + AddUserToIVUsers = true; + } + } else if (Processed.count(User) || + !AddUsersIfInteresting(User)) { + DEBUG(dbgs() << "FOUND USER: " << *User << '\n' + << " OF SCEV: " << *ISE << '\n'); + AddUserToIVUsers = true; + } + + if (AddUserToIVUsers) { + // Okay, we found a user that we cannot reduce. + IVUses.push_back(new IVStrideUse(this, User, I)); + IVStrideUse &NewUse = IVUses.back(); + // Transform the expression into a normalized form. + ISE = TransformForPostIncUse(NormalizeAutodetect, + ISE, User, I, + NewUse.PostIncLoops, + *SE, *DT); + DEBUG(dbgs() << " NORMALIZED TO: " << *ISE << '\n'); + } + } + return true; +} + +IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) { + IVUses.push_back(new IVStrideUse(this, User, Operand)); + return IVUses.back(); +} + +IVUsers::IVUsers() + : LoopPass(ID) { +} + +void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LoopInfo>(); + AU.addRequired<DominatorTree>(); + AU.addRequired<ScalarEvolution>(); + AU.setPreservesAll(); +} + +bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { + + L = l; + LI = &getAnalysis<LoopInfo>(); + DT = &getAnalysis<DominatorTree>(); + SE = &getAnalysis<ScalarEvolution>(); + + // Find all uses of induction variables in this loop, and categorize + // them by stride. Start by finding all of the PHI nodes in the header for + // this loop. If they are induction variables, inspect their uses. + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) + (void)AddUsersIfInteresting(I); + + return false; +} + +void IVUsers::print(raw_ostream &OS, const Module *M) const { + OS << "IV Users for loop "; + WriteAsOperand(OS, L->getHeader(), false); + if (SE->hasLoopInvariantBackedgeTakenCount(L)) { + OS << " with backedge-taken count " + << *SE->getBackedgeTakenCount(L); + } + OS << ":\n"; + + for (ilist<IVStrideUse>::const_iterator UI = IVUses.begin(), + E = IVUses.end(); UI != E; ++UI) { + OS << " "; + WriteAsOperand(OS, UI->getOperandValToReplace(), false); + OS << " = " << *getReplacementExpr(*UI); + for (PostIncLoopSet::const_iterator + I = UI->PostIncLoops.begin(), + E = UI->PostIncLoops.end(); I != E; ++I) { + OS << " (post-inc with loop "; + WriteAsOperand(OS, (*I)->getHeader(), false); + OS << ")"; + } + OS << " in "; + UI->getUser()->print(OS); + OS << '\n'; + } +} + +void IVUsers::dump() const { + print(dbgs()); +} + +void IVUsers::releaseMemory() { + Processed.clear(); + IVUses.clear(); +} + +/// getReplacementExpr - Return a SCEV expression which computes the +/// value of the OperandValToReplace. +const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &IU) const { + return SE->getSCEV(IU.getOperandValToReplace()); +} + +/// getExpr - Return the expression for the use. +const SCEV *IVUsers::getExpr(const IVStrideUse &IU) const { + return + TransformForPostIncUse(Normalize, getReplacementExpr(IU), + IU.getUser(), IU.getOperandValToReplace(), + const_cast<PostIncLoopSet &>(IU.getPostIncLoops()), + *SE, *DT); +} + +static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) { + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + if (AR->getLoop() == L) + return AR; + return findAddRecForLoop(AR->getStart(), L); + } + + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) + if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L)) + return AR; + return 0; + } + + return 0; +} + +const SCEV *IVUsers::getStride(const IVStrideUse &IU, const Loop *L) const { + if (const SCEVAddRecExpr *AR = findAddRecForLoop(getExpr(IU), L)) + return AR->getStepRecurrence(*SE); + return 0; +} + +void IVStrideUse::transformToPostInc(const Loop *L) { + PostIncLoops.insert(L); +} + +void IVStrideUse::deleted() { + // Remove this user from the list. + Parent->IVUses.erase(this); + // this now dangles! +} diff --git a/contrib/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm/lib/Analysis/InlineCost.cpp new file mode 100644 index 0000000..3e550f3 --- /dev/null +++ b/contrib/llvm/lib/Analysis/InlineCost.cpp @@ -0,0 +1,482 @@ +//===- InlineCost.cpp - Cost analysis for inliner -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements inline cost analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Support/CallSite.h" +#include "llvm/CallingConv.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/ADT/SmallPtrSet.h" +using namespace llvm; + +// CountCodeReductionForConstant - Figure out an approximation for how many +// instructions will be constant folded if the specified value is constant. +// +unsigned InlineCostAnalyzer::FunctionInfo:: +CountCodeReductionForConstant(Value *V) { + unsigned Reduction = 0; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ + User *U = *UI; + if (isa<BranchInst>(U) || isa<SwitchInst>(U)) { + // We will be able to eliminate all but one of the successors. + const TerminatorInst &TI = cast<TerminatorInst>(*U); + const unsigned NumSucc = TI.getNumSuccessors(); + unsigned Instrs = 0; + for (unsigned I = 0; I != NumSucc; ++I) + Instrs += Metrics.NumBBInsts[TI.getSuccessor(I)]; + // We don't know which blocks will be eliminated, so use the average size. + Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc; + } else if (CallInst *CI = dyn_cast<CallInst>(U)) { + // Turning an indirect call into a direct call is a BIG win + if (CI->getCalledValue() == V) + Reduction += InlineConstants::IndirectCallBonus; + } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) { + // Turning an indirect call into a direct call is a BIG win + if (II->getCalledValue() == V) + Reduction += InlineConstants::IndirectCallBonus; + } else { + // Figure out if this instruction will be removed due to simple constant + // propagation. + Instruction &Inst = cast<Instruction>(*U); + + // We can't constant propagate instructions which have effects or + // read memory. + // + // FIXME: It would be nice to capture the fact that a load from a + // pointer-to-constant-global is actually a *really* good thing to zap. + // Unfortunately, we don't know the pointer that may get propagated here, + // so we can't make this decision. + if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || + isa<AllocaInst>(Inst)) + continue; + + bool AllOperandsConstant = true; + for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) + if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) { + AllOperandsConstant = false; + break; + } + + if (AllOperandsConstant) { + // We will get to remove this instruction... + Reduction += InlineConstants::InstrCost; + + // And any other instructions that use it which become constants + // themselves. + Reduction += CountCodeReductionForConstant(&Inst); + } + } + } + return Reduction; +} + +// CountCodeReductionForAlloca - Figure out an approximation of how much smaller +// the function will be if it is inlined into a context where an argument +// becomes an alloca. +// +unsigned InlineCostAnalyzer::FunctionInfo:: + CountCodeReductionForAlloca(Value *V) { + if (!V->getType()->isPointerTy()) return 0; // Not a pointer + unsigned Reduction = 0; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ + Instruction *I = cast<Instruction>(*UI); + if (isa<LoadInst>(I) || isa<StoreInst>(I)) + Reduction += InlineConstants::InstrCost; + else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { + // If the GEP has variable indices, we won't be able to do much with it. + if (GEP->hasAllConstantIndices()) + Reduction += CountCodeReductionForAlloca(GEP); + } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) { + // Track pointer through bitcasts. + Reduction += CountCodeReductionForAlloca(BCI); + } else { + // If there is some other strange instruction, we're not going to be able + // to do much if we inline this. + return 0; + } + } + + return Reduction; +} + +/// callIsSmall - If a call is likely to lower to a single target instruction, +/// or is otherwise deemed small return true. +/// TODO: Perhaps calls like memcpy, strcpy, etc? +bool llvm::callIsSmall(const Function *F) { + if (!F) return false; + + if (F->hasLocalLinkage()) return false; + + if (!F->hasName()) return false; + + StringRef Name = F->getName(); + + // These will all likely lower to a single selection DAG node. + if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || + Name == "fabs" || Name == "fabsf" || Name == "fabsl" || + Name == "sin" || Name == "sinf" || Name == "sinl" || + Name == "cos" || Name == "cosf" || Name == "cosl" || + Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ) + return true; + + // These are all likely to be optimized into something smaller. + if (Name == "pow" || Name == "powf" || Name == "powl" || + Name == "exp2" || Name == "exp2l" || Name == "exp2f" || + Name == "floor" || Name == "floorf" || Name == "ceil" || + Name == "round" || Name == "ffs" || Name == "ffsl" || + Name == "abs" || Name == "labs" || Name == "llabs") + return true; + + return false; +} + +/// analyzeBasicBlock - Fill in the current structure with information gleaned +/// from the specified block. +void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { + ++NumBlocks; + unsigned NumInstsBeforeThisBB = NumInsts; + for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); + II != E; ++II) { + if (isa<PHINode>(II)) continue; // PHI nodes don't count. + + // Special handling for calls. + if (isa<CallInst>(II) || isa<InvokeInst>(II)) { + if (isa<DbgInfoIntrinsic>(II)) + continue; // Debug intrinsics don't count as size. + + ImmutableCallSite CS(cast<Instruction>(II)); + + // If this function contains a call to setjmp or _setjmp, never inline + // it. This is a hack because we depend on the user marking their local + // variables as volatile if they are live across a setjmp call, and they + // probably won't do this in callers. + if (const Function *F = CS.getCalledFunction()) { + if (F->isDeclaration() && + (F->getName() == "setjmp" || F->getName() == "_setjmp")) + callsSetJmp = true; + + // If this call is to function itself, then the function is recursive. + // Inlining it into other functions is a bad idea, because this is + // basically just a form of loop peeling, and our metrics aren't useful + // for that case. + if (F == BB->getParent()) + isRecursive = true; + } + + if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) { + // Each argument to a call takes on average one instruction to set up. + NumInsts += CS.arg_size(); + + // We don't want inline asm to count as a call - that would prevent loop + // unrolling. The argument setup cost is still real, though. + if (!isa<InlineAsm>(CS.getCalledValue())) + ++NumCalls; + } + } + + if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { + if (!AI->isStaticAlloca()) + this->usesDynamicAlloca = true; + } + + if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy()) + ++NumVectorInsts; + + if (const CastInst *CI = dyn_cast<CastInst>(II)) { + // Noop casts, including ptr <-> int, don't count. + if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || + isa<PtrToIntInst>(CI)) + continue; + // Result of a cmp instruction is often extended (to be used by other + // cmp instructions, logical or return instructions). These are usually + // nop on most sane targets. + if (isa<CmpInst>(CI->getOperand(0))) + continue; + } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){ + // If a GEP has all constant indices, it will probably be folded with + // a load/store. + if (GEPI->hasAllConstantIndices()) + continue; + } + + ++NumInsts; + } + + if (isa<ReturnInst>(BB->getTerminator())) + ++NumRets; + + // We never want to inline functions that contain an indirectbr. This is + // incorrect because all the blockaddress's (in static global initializers + // for example) would be referring to the original function, and this indirect + // jump would jump from the inlined copy of the function into the original + // function which is extremely undefined behavior. + if (isa<IndirectBrInst>(BB->getTerminator())) + containsIndirectBr = true; + + // Remember NumInsts for this BB. + NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB; +} + +/// analyzeFunction - Fill in the current structure with information gleaned +/// from the specified function. +void CodeMetrics::analyzeFunction(Function *F) { + // Look at the size of the callee. + for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + analyzeBasicBlock(&*BB); +} + +/// analyzeFunction - Fill in the current structure with information gleaned +/// from the specified function. +void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { + Metrics.analyzeFunction(F); + + // A function with exactly one return has it removed during the inlining + // process (see InlineFunction), so don't count it. + // FIXME: This knowledge should really be encoded outside of FunctionInfo. + if (Metrics.NumRets==1) + --Metrics.NumInsts; + + // Don't bother calculating argument weights if we are never going to inline + // the function anyway. + if (NeverInline()) + return; + + // Check out all of the arguments to the function, figuring out how much + // code can be eliminated if one of the arguments is a constant. + ArgumentWeights.reserve(F->arg_size()); + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) + ArgumentWeights.push_back(ArgInfo(CountCodeReductionForConstant(I), + CountCodeReductionForAlloca(I))); +} + +/// NeverInline - returns true if the function should never be inlined into +/// any caller +bool InlineCostAnalyzer::FunctionInfo::NeverInline() +{ + return (Metrics.callsSetJmp || Metrics.isRecursive || + Metrics.containsIndirectBr); + +} +// getInlineCost - The heuristic used to determine if we should inline the +// function call or not. +// +InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, + SmallPtrSet<const Function*, 16> &NeverInline) { + return getInlineCost(CS, CS.getCalledFunction(), NeverInline); +} + +InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, + Function *Callee, + SmallPtrSet<const Function*, 16> &NeverInline) { + Instruction *TheCall = CS.getInstruction(); + Function *Caller = TheCall->getParent()->getParent(); + bool isDirectCall = CS.getCalledFunction() == Callee; + + // Don't inline functions which can be redefined at link-time to mean + // something else. Don't inline functions marked noinline or call sites + // marked noinline. + if (Callee->mayBeOverridden() || + Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee) || + CS.isNoInline()) + return llvm::InlineCost::getNever(); + + // InlineCost - This value measures how good of an inline candidate this call + // site is to inline. A lower inline cost make is more likely for the call to + // be inlined. This value may go negative. + // + int InlineCost = 0; + + // If there is only one call of the function, and it has internal linkage, + // make it almost guaranteed to be inlined. + // + if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall) + InlineCost += InlineConstants::LastCallToStaticBonus; + + // If this function uses the coldcc calling convention, prefer not to inline + // it. + if (Callee->getCallingConv() == CallingConv::Cold) + InlineCost += InlineConstants::ColdccPenalty; + + // If the instruction after the call, or if the normal destination of the + // invoke is an unreachable instruction, the function is noreturn. As such, + // there is little point in inlining this. + if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { + if (isa<UnreachableInst>(II->getNormalDest()->begin())) + InlineCost += InlineConstants::NoreturnPenalty; + } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall))) + InlineCost += InlineConstants::NoreturnPenalty; + + // Get information about the callee. + FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; + + // If we haven't calculated this information yet, do so now. + if (CalleeFI->Metrics.NumBlocks == 0) + CalleeFI->analyzeFunction(Callee); + + // If we should never inline this, return a huge cost. + if (CalleeFI->NeverInline()) + return InlineCost::getNever(); + + // FIXME: It would be nice to kill off CalleeFI->NeverInline. Then we + // could move this up and avoid computing the FunctionInfo for + // things we are going to just return always inline for. This + // requires handling setjmp somewhere else, however. + if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline)) + return InlineCost::getAlways(); + + if (CalleeFI->Metrics.usesDynamicAlloca) { + // Get infomation about the caller. + FunctionInfo &CallerFI = CachedFunctionInfo[Caller]; + + // If we haven't calculated this information yet, do so now. + if (CallerFI.Metrics.NumBlocks == 0) { + CallerFI.analyzeFunction(Caller); + + // Recompute the CalleeFI pointer, getting Caller could have invalidated + // it. + CalleeFI = &CachedFunctionInfo[Callee]; + } + + // Don't inline a callee with dynamic alloca into a caller without them. + // Functions containing dynamic alloca's are inefficient in various ways; + // don't create more inefficiency. + if (!CallerFI.Metrics.usesDynamicAlloca) + return InlineCost::getNever(); + } + + // Add to the inline quality for properties that make the call valuable to + // inline. This includes factors that indicate that the result of inlining + // the function will be optimizable. Currently this just looks at arguments + // passed into the function. + // + unsigned ArgNo = 0; + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I, ++ArgNo) { + // Each argument passed in has a cost at both the caller and the callee + // sides. Measurements show that each argument costs about the same as an + // instruction. + InlineCost -= InlineConstants::InstrCost; + + // If an alloca is passed in, inlining this function is likely to allow + // significant future optimization possibilities (like scalar promotion, and + // scalarization), so encourage the inlining of the function. + // + if (isa<AllocaInst>(I)) { + if (ArgNo < CalleeFI->ArgumentWeights.size()) + InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight; + + // If this is a constant being passed into the function, use the argument + // weights calculated for the callee to determine how much will be folded + // away with this information. + } else if (isa<Constant>(I)) { + if (ArgNo < CalleeFI->ArgumentWeights.size()) + InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight; + } + } + + // Now that we have considered all of the factors that make the call site more + // likely to be inlined, look at factors that make us not want to inline it. + + // Calls usually take a long time, so they make the inlining gain smaller. + InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty; + + // Look at the size of the callee. Each instruction counts as 5. + InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost; + + return llvm::InlineCost::get(InlineCost); +} + +// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a +// higher threshold to determine if the function call should be inlined. +float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) { + Function *Callee = CS.getCalledFunction(); + + // Get information about the callee. + FunctionInfo &CalleeFI = CachedFunctionInfo[Callee]; + + // If we haven't calculated this information yet, do so now. + if (CalleeFI.Metrics.NumBlocks == 0) + CalleeFI.analyzeFunction(Callee); + + float Factor = 1.0f; + // Single BB functions are often written to be inlined. + if (CalleeFI.Metrics.NumBlocks == 1) + Factor += 0.5f; + + // Be more aggressive if the function contains a good chunk (if it mades up + // at least 10% of the instructions) of vector instructions. + if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/2) + Factor += 2.0f; + else if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/10) + Factor += 1.5f; + return Factor; +} + +/// growCachedCostInfo - update the cached cost info for Caller after Callee has +/// been inlined. +void +InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) { + CodeMetrics &CallerMetrics = CachedFunctionInfo[Caller].Metrics; + + // For small functions we prefer to recalculate the cost for better accuracy. + if (CallerMetrics.NumBlocks < 10 || CallerMetrics.NumInsts < 1000) { + resetCachedCostInfo(Caller); + return; + } + + // For large functions, we can save a lot of computation time by skipping + // recalculations. + if (CallerMetrics.NumCalls > 0) + --CallerMetrics.NumCalls; + + if (Callee == 0) return; + + CodeMetrics &CalleeMetrics = CachedFunctionInfo[Callee].Metrics; + + // If we don't have metrics for the callee, don't recalculate them just to + // update an approximation in the caller. Instead, just recalculate the + // caller info from scratch. + if (CalleeMetrics.NumBlocks == 0) { + resetCachedCostInfo(Caller); + return; + } + + // Since CalleeMetrics were already calculated, we know that the CallerMetrics + // reference isn't invalidated: both were in the DenseMap. + CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca; + + // FIXME: If any of these three are true for the callee, the callee was + // not inlined into the caller, so I think they're redundant here. + CallerMetrics.callsSetJmp |= CalleeMetrics.callsSetJmp; + CallerMetrics.isRecursive |= CalleeMetrics.isRecursive; + CallerMetrics.containsIndirectBr |= CalleeMetrics.containsIndirectBr; + + CallerMetrics.NumInsts += CalleeMetrics.NumInsts; + CallerMetrics.NumBlocks += CalleeMetrics.NumBlocks; + CallerMetrics.NumCalls += CalleeMetrics.NumCalls; + CallerMetrics.NumVectorInsts += CalleeMetrics.NumVectorInsts; + CallerMetrics.NumRets += CalleeMetrics.NumRets; + + // analyzeBasicBlock counts each function argument as an inst. + if (CallerMetrics.NumInsts >= Callee->arg_size()) + CallerMetrics.NumInsts -= Callee->arg_size(); + else + CallerMetrics.NumInsts = 0; + + // We are not updating the argument weights. We have already determined that + // Caller is a fairly large function, so we accept the loss of precision. +} + +/// clear - empty the cache of inline costs +void InlineCostAnalyzer::clear() { + CachedFunctionInfo.clear(); +} diff --git a/contrib/llvm/lib/Analysis/InstCount.cpp b/contrib/llvm/lib/Analysis/InstCount.cpp new file mode 100644 index 0000000..dcbcac0 --- /dev/null +++ b/contrib/llvm/lib/Analysis/InstCount.cpp @@ -0,0 +1,85 @@ +//===-- InstCount.cpp - Collects the count of all instructions ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass collects the count of all instructions and reports them +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "instcount" +#include "llvm/Analysis/Passes.h" +#include "llvm/Pass.h" +#include "llvm/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/InstVisitor.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(TotalInsts , "Number of instructions (of all types)"); +STATISTIC(TotalBlocks, "Number of basic blocks"); +STATISTIC(TotalFuncs , "Number of non-external functions"); +STATISTIC(TotalMemInst, "Number of memory instructions"); + +#define HANDLE_INST(N, OPCODE, CLASS) \ + STATISTIC(Num ## OPCODE ## Inst, "Number of " #OPCODE " insts"); + +#include "llvm/Instruction.def" + + +namespace { + class InstCount : public FunctionPass, public InstVisitor<InstCount> { + friend class InstVisitor<InstCount>; + + void visitFunction (Function &F) { ++TotalFuncs; } + void visitBasicBlock(BasicBlock &BB) { ++TotalBlocks; } + +#define HANDLE_INST(N, OPCODE, CLASS) \ + void visit##OPCODE(CLASS &) { ++Num##OPCODE##Inst; ++TotalInsts; } + +#include "llvm/Instruction.def" + + void visitInstruction(Instruction &I) { + errs() << "Instruction Count does not know about " << I; + llvm_unreachable(0); + } + public: + static char ID; // Pass identification, replacement for typeid + InstCount() : FunctionPass(ID) {} + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + virtual void print(raw_ostream &O, const Module *M) const {} + + }; +} + +char InstCount::ID = 0; +INITIALIZE_PASS(InstCount, "instcount", + "Counts the various types of Instructions", false, true); + +FunctionPass *llvm::createInstCountPass() { return new InstCount(); } + +// InstCount::run - This is the main Analysis entry point for a +// function. +// +bool InstCount::runOnFunction(Function &F) { + unsigned StartMemInsts = + NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst + + NumInvokeInst + NumAllocaInst; + visit(F); + unsigned EndMemInsts = + NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst + + NumInvokeInst + NumAllocaInst; + TotalMemInst += EndMemInsts-StartMemInsts; + return false; +} diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp new file mode 100644 index 0000000..24cd343 --- /dev/null +++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp @@ -0,0 +1,486 @@ +//===- InstructionSimplify.cpp - Fold instruction operands ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements routines for folding instructions into simpler forms +// that do not require creating new instructions. For example, this does +// constant folding, and can handle identities like (X&0)->0. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Support/ValueHandle.h" +#include "llvm/Instructions.h" +#include "llvm/Support/PatternMatch.h" +using namespace llvm; +using namespace llvm::PatternMatch; + +/// SimplifyAddInst - Given operands for an Add, see if we can +/// fold the result. If not, this returns null. +Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const TargetData *TD) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) { + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(), + Ops, 2, TD); + } + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + if (Constant *Op1C = dyn_cast<Constant>(Op1)) { + // X + undef -> undef + if (isa<UndefValue>(Op1C)) + return Op1C; + + // X + 0 --> X + if (Op1C->isNullValue()) + return Op0; + } + + // FIXME: Could pull several more out of instcombine. + return 0; +} + +/// SimplifyAndInst - Given operands for an And, see if we can +/// fold the result. If not, this returns null. +Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) { + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::And, CLHS->getType(), + Ops, 2, TD); + } + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + // X & undef -> 0 + if (isa<UndefValue>(Op1)) + return Constant::getNullValue(Op0->getType()); + + // X & X = X + if (Op0 == Op1) + return Op0; + + // X & <0,0> = <0,0> + if (isa<ConstantAggregateZero>(Op1)) + return Op1; + + // X & <-1,-1> = X + if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) + if (CP->isAllOnesValue()) + return Op0; + + if (ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1)) { + // X & 0 = 0 + if (Op1CI->isZero()) + return Op1CI; + // X & -1 = X + if (Op1CI->isAllOnesValue()) + return Op0; + } + + // A & ~A = ~A & A = 0 + Value *A, *B; + if ((match(Op0, m_Not(m_Value(A))) && A == Op1) || + (match(Op1, m_Not(m_Value(A))) && A == Op0)) + return Constant::getNullValue(Op0->getType()); + + // (A | ?) & A = A + if (match(Op0, m_Or(m_Value(A), m_Value(B))) && + (A == Op1 || B == Op1)) + return Op1; + + // A & (A | ?) = A + if (match(Op1, m_Or(m_Value(A), m_Value(B))) && + (A == Op0 || B == Op0)) + return Op0; + + return 0; +} + +/// SimplifyOrInst - Given operands for an Or, see if we can +/// fold the result. If not, this returns null. +Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) { + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(), + Ops, 2, TD); + } + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + // X | undef -> -1 + if (isa<UndefValue>(Op1)) + return Constant::getAllOnesValue(Op0->getType()); + + // X | X = X + if (Op0 == Op1) + return Op0; + + // X | <0,0> = X + if (isa<ConstantAggregateZero>(Op1)) + return Op0; + + // X | <-1,-1> = <-1,-1> + if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) + if (CP->isAllOnesValue()) + return Op1; + + if (ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1)) { + // X | 0 = X + if (Op1CI->isZero()) + return Op0; + // X | -1 = -1 + if (Op1CI->isAllOnesValue()) + return Op1CI; + } + + // A | ~A = ~A | A = -1 + Value *A, *B; + if ((match(Op0, m_Not(m_Value(A))) && A == Op1) || + (match(Op1, m_Not(m_Value(A))) && A == Op0)) + return Constant::getAllOnesValue(Op0->getType()); + + // (A & ?) | A = A + if (match(Op0, m_And(m_Value(A), m_Value(B))) && + (A == Op1 || B == Op1)) + return Op1; + + // A | (A & ?) = A + if (match(Op1, m_And(m_Value(A), m_Value(B))) && + (A == Op0 || B == Op0)) + return Op0; + + return 0; +} + + +static const Type *GetCompareTy(Value *Op) { + return CmpInst::makeCmpResultType(Op->getType()); +} + + +/// SimplifyICmpInst - Given operands for an ICmpInst, see if we can +/// fold the result. If not, this returns null. +Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const TargetData *TD) { + CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; + assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!"); + + if (Constant *CLHS = dyn_cast<Constant>(LHS)) { + if (Constant *CRHS = dyn_cast<Constant>(RHS)) + return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD); + + // If we have a constant, make sure it is on the RHS. + std::swap(LHS, RHS); + Pred = CmpInst::getSwappedPredicate(Pred); + } + + // ITy - This is the return type of the compare we're considering. + const Type *ITy = GetCompareTy(LHS); + + // icmp X, X -> true/false + // X icmp undef -> true/false. For example, icmp ugt %X, undef -> false + // because X could be 0. + if (LHS == RHS || isa<UndefValue>(RHS)) + return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred)); + + // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value + // addresses never equal each other! We already know that Op0 != Op1. + if ((isa<GlobalValue>(LHS) || isa<AllocaInst>(LHS) || + isa<ConstantPointerNull>(LHS)) && + (isa<GlobalValue>(RHS) || isa<AllocaInst>(RHS) || + isa<ConstantPointerNull>(RHS))) + return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred)); + + // See if we are doing a comparison with a constant. + if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { + // If we have an icmp le or icmp ge instruction, turn it into the + // appropriate icmp lt or icmp gt instruction. This allows us to rely on + // them being folded in the code below. + switch (Pred) { + default: break; + case ICmpInst::ICMP_ULE: + if (CI->isMaxValue(false)) // A <=u MAX -> TRUE + return ConstantInt::getTrue(CI->getContext()); + break; + case ICmpInst::ICMP_SLE: + if (CI->isMaxValue(true)) // A <=s MAX -> TRUE + return ConstantInt::getTrue(CI->getContext()); + break; + case ICmpInst::ICMP_UGE: + if (CI->isMinValue(false)) // A >=u MIN -> TRUE + return ConstantInt::getTrue(CI->getContext()); + break; + case ICmpInst::ICMP_SGE: + if (CI->isMinValue(true)) // A >=s MIN -> TRUE + return ConstantInt::getTrue(CI->getContext()); + break; + } + } + + + return 0; +} + +/// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can +/// fold the result. If not, this returns null. +Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const TargetData *TD) { + CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; + assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!"); + + if (Constant *CLHS = dyn_cast<Constant>(LHS)) { + if (Constant *CRHS = dyn_cast<Constant>(RHS)) + return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD); + + // If we have a constant, make sure it is on the RHS. + std::swap(LHS, RHS); + Pred = CmpInst::getSwappedPredicate(Pred); + } + + // Fold trivial predicates. + if (Pred == FCmpInst::FCMP_FALSE) + return ConstantInt::get(GetCompareTy(LHS), 0); + if (Pred == FCmpInst::FCMP_TRUE) + return ConstantInt::get(GetCompareTy(LHS), 1); + + if (isa<UndefValue>(RHS)) // fcmp pred X, undef -> undef + return UndefValue::get(GetCompareTy(LHS)); + + // fcmp x,x -> true/false. Not all compares are foldable. + if (LHS == RHS) { + if (CmpInst::isTrueWhenEqual(Pred)) + return ConstantInt::get(GetCompareTy(LHS), 1); + if (CmpInst::isFalseWhenEqual(Pred)) + return ConstantInt::get(GetCompareTy(LHS), 0); + } + + // Handle fcmp with constant RHS + if (Constant *RHSC = dyn_cast<Constant>(RHS)) { + // If the constant is a nan, see if we can fold the comparison based on it. + if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) { + if (CFP->getValueAPF().isNaN()) { + if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo" + return ConstantInt::getFalse(CFP->getContext()); + assert(FCmpInst::isUnordered(Pred) && + "Comparison must be either ordered or unordered!"); + // True if unordered. + return ConstantInt::getTrue(CFP->getContext()); + } + // Check whether the constant is an infinity. + if (CFP->getValueAPF().isInfinity()) { + if (CFP->getValueAPF().isNegative()) { + switch (Pred) { + case FCmpInst::FCMP_OLT: + // No value is ordered and less than negative infinity. + return ConstantInt::getFalse(CFP->getContext()); + case FCmpInst::FCMP_UGE: + // All values are unordered with or at least negative infinity. + return ConstantInt::getTrue(CFP->getContext()); + default: + break; + } + } else { + switch (Pred) { + case FCmpInst::FCMP_OGT: + // No value is ordered and greater than infinity. + return ConstantInt::getFalse(CFP->getContext()); + case FCmpInst::FCMP_ULE: + // All values are unordered with and at most infinity. + return ConstantInt::getTrue(CFP->getContext()); + default: + break; + } + } + } + } + } + + return 0; +} + +/// SimplifySelectInst - Given operands for a SelectInst, see if we can fold +/// the result. If not, this returns null. +Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal, + const TargetData *TD) { + // select true, X, Y -> X + // select false, X, Y -> Y + if (ConstantInt *CB = dyn_cast<ConstantInt>(CondVal)) + return CB->getZExtValue() ? TrueVal : FalseVal; + + // select C, X, X -> X + if (TrueVal == FalseVal) + return TrueVal; + + if (isa<UndefValue>(TrueVal)) // select C, undef, X -> X + return FalseVal; + if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X + return TrueVal; + if (isa<UndefValue>(CondVal)) { // select undef, X, Y -> X or Y + if (isa<Constant>(TrueVal)) + return TrueVal; + return FalseVal; + } + + + + return 0; +} + + +/// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can +/// fold the result. If not, this returns null. +Value *llvm::SimplifyGEPInst(Value *const *Ops, unsigned NumOps, + const TargetData *TD) { + // getelementptr P -> P. + if (NumOps == 1) + return Ops[0]; + + // TODO. + //if (isa<UndefValue>(Ops[0])) + // return UndefValue::get(GEP.getType()); + + // getelementptr P, 0 -> P. + if (NumOps == 2) + if (ConstantInt *C = dyn_cast<ConstantInt>(Ops[1])) + if (C->isZero()) + return Ops[0]; + + // Check to see if this is constant foldable. + for (unsigned i = 0; i != NumOps; ++i) + if (!isa<Constant>(Ops[i])) + return 0; + + return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), + (Constant *const*)Ops+1, NumOps-1); +} + + +//=== Helper functions for higher up the class hierarchy. + +/// SimplifyBinOp - Given operands for a BinaryOperator, see if we can +/// fold the result. If not, this returns null. +Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + const TargetData *TD) { + switch (Opcode) { + case Instruction::And: return SimplifyAndInst(LHS, RHS, TD); + case Instruction::Or: return SimplifyOrInst(LHS, RHS, TD); + default: + if (Constant *CLHS = dyn_cast<Constant>(LHS)) + if (Constant *CRHS = dyn_cast<Constant>(RHS)) { + Constant *COps[] = {CLHS, CRHS}; + return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, 2, TD); + } + return 0; + } +} + +/// SimplifyCmpInst - Given operands for a CmpInst, see if we can +/// fold the result. +Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const TargetData *TD) { + if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate)) + return SimplifyICmpInst(Predicate, LHS, RHS, TD); + return SimplifyFCmpInst(Predicate, LHS, RHS, TD); +} + + +/// SimplifyInstruction - See if we can compute a simplified version of this +/// instruction. If not, this returns null. +Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD) { + switch (I->getOpcode()) { + default: + return ConstantFoldInstruction(I, TD); + case Instruction::Add: + return SimplifyAddInst(I->getOperand(0), I->getOperand(1), + cast<BinaryOperator>(I)->hasNoSignedWrap(), + cast<BinaryOperator>(I)->hasNoUnsignedWrap(), TD); + case Instruction::And: + return SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD); + case Instruction::Or: + return SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD); + case Instruction::ICmp: + return SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(), + I->getOperand(0), I->getOperand(1), TD); + case Instruction::FCmp: + return SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), + I->getOperand(0), I->getOperand(1), TD); + case Instruction::Select: + return SimplifySelectInst(I->getOperand(0), I->getOperand(1), + I->getOperand(2), TD); + case Instruction::GetElementPtr: { + SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end()); + return SimplifyGEPInst(&Ops[0], Ops.size(), TD); + } + } +} + +/// ReplaceAndSimplifyAllUses - Perform From->replaceAllUsesWith(To) and then +/// delete the From instruction. In addition to a basic RAUW, this does a +/// recursive simplification of the newly formed instructions. This catches +/// things where one simplification exposes other opportunities. This only +/// simplifies and deletes scalar operations, it does not change the CFG. +/// +void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To, + const TargetData *TD) { + assert(From != To && "ReplaceAndSimplifyAllUses(X,X) is not valid!"); + + // FromHandle/ToHandle - This keeps a WeakVH on the from/to values so that + // we can know if it gets deleted out from under us or replaced in a + // recursive simplification. + WeakVH FromHandle(From); + WeakVH ToHandle(To); + + while (!From->use_empty()) { + // Update the instruction to use the new value. + Use &TheUse = From->use_begin().getUse(); + Instruction *User = cast<Instruction>(TheUse.getUser()); + TheUse = To; + + // Check to see if the instruction can be folded due to the operand + // replacement. For example changing (or X, Y) into (or X, -1) can replace + // the 'or' with -1. + Value *SimplifiedVal; + { + // Sanity check to make sure 'User' doesn't dangle across + // SimplifyInstruction. + AssertingVH<> UserHandle(User); + + SimplifiedVal = SimplifyInstruction(User, TD); + if (SimplifiedVal == 0) continue; + } + + // Recursively simplify this user to the new value. + ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD); + From = dyn_cast_or_null<Instruction>((Value*)FromHandle); + To = ToHandle; + + assert(ToHandle && "To value deleted by recursive simplification?"); + + // If the recursive simplification ended up revisiting and deleting + // 'From' then we're done. + if (From == 0) + return; + } + + // If 'From' has value handles referring to it, do a real RAUW to update them. + From->replaceAllUsesWith(To); + + From->eraseFromParent(); +} + diff --git a/contrib/llvm/lib/Analysis/Interval.cpp b/contrib/llvm/lib/Analysis/Interval.cpp new file mode 100644 index 0000000..ca9cdca --- /dev/null +++ b/contrib/llvm/lib/Analysis/Interval.cpp @@ -0,0 +1,58 @@ +//===- Interval.cpp - Interval class code ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the definition of the Interval class, which represents a +// partition of a control flow graph of some kind. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Interval.h" +#include "llvm/BasicBlock.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Interval Implementation +//===----------------------------------------------------------------------===// + +// isLoop - Find out if there is a back edge in this interval... +// +bool Interval::isLoop() const { + // There is a loop in this interval iff one of the predecessors of the header + // node lives in the interval. + for (::pred_iterator I = ::pred_begin(HeaderNode), E = ::pred_end(HeaderNode); + I != E; ++I) + if (contains(*I)) + return true; + return false; +} + + +void Interval::print(raw_ostream &OS) const { + OS << "-------------------------------------------------------------\n" + << "Interval Contents:\n"; + + // Print out all of the basic blocks in the interval... + for (std::vector<BasicBlock*>::const_iterator I = Nodes.begin(), + E = Nodes.end(); I != E; ++I) + OS << **I << "\n"; + + OS << "Interval Predecessors:\n"; + for (std::vector<BasicBlock*>::const_iterator I = Predecessors.begin(), + E = Predecessors.end(); I != E; ++I) + OS << **I << "\n"; + + OS << "Interval Successors:\n"; + for (std::vector<BasicBlock*>::const_iterator I = Successors.begin(), + E = Successors.end(); I != E; ++I) + OS << **I << "\n"; +} diff --git a/contrib/llvm/lib/Analysis/IntervalPartition.cpp b/contrib/llvm/lib/Analysis/IntervalPartition.cpp new file mode 100644 index 0000000..1c9e148 --- /dev/null +++ b/contrib/llvm/lib/Analysis/IntervalPartition.cpp @@ -0,0 +1,114 @@ +//===- IntervalPartition.cpp - Interval Partition module code -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the definition of the IntervalPartition class, which +// calculates and represent the interval partition of a function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/IntervalIterator.h" +using namespace llvm; + +char IntervalPartition::ID = 0; +INITIALIZE_PASS(IntervalPartition, "intervals", + "Interval Partition Construction", true, true); + +//===----------------------------------------------------------------------===// +// IntervalPartition Implementation +//===----------------------------------------------------------------------===// + +// releaseMemory - Reset state back to before function was analyzed +void IntervalPartition::releaseMemory() { + for (unsigned i = 0, e = Intervals.size(); i != e; ++i) + delete Intervals[i]; + IntervalMap.clear(); + Intervals.clear(); + RootInterval = 0; +} + +void IntervalPartition::print(raw_ostream &O, const Module*) const { + for(unsigned i = 0, e = Intervals.size(); i != e; ++i) + Intervals[i]->print(O); +} + +// addIntervalToPartition - Add an interval to the internal list of intervals, +// and then add mappings from all of the basic blocks in the interval to the +// interval itself (in the IntervalMap). +// +void IntervalPartition::addIntervalToPartition(Interval *I) { + Intervals.push_back(I); + + // Add mappings for all of the basic blocks in I to the IntervalPartition + for (Interval::node_iterator It = I->Nodes.begin(), End = I->Nodes.end(); + It != End; ++It) + IntervalMap.insert(std::make_pair(*It, I)); +} + +// updatePredecessors - Interval generation only sets the successor fields of +// the interval data structures. After interval generation is complete, +// run through all of the intervals and propagate successor info as +// predecessor info. +// +void IntervalPartition::updatePredecessors(Interval *Int) { + BasicBlock *Header = Int->getHeaderNode(); + for (Interval::succ_iterator I = Int->Successors.begin(), + E = Int->Successors.end(); I != E; ++I) + getBlockInterval(*I)->Predecessors.push_back(Header); +} + +// IntervalPartition ctor - Build the first level interval partition for the +// specified function... +// +bool IntervalPartition::runOnFunction(Function &F) { + // Pass false to intervals_begin because we take ownership of it's memory + function_interval_iterator I = intervals_begin(&F, false); + assert(I != intervals_end(&F) && "No intervals in function!?!?!"); + + addIntervalToPartition(RootInterval = *I); + + ++I; // After the first one... + + // Add the rest of the intervals to the partition. + for (function_interval_iterator E = intervals_end(&F); I != E; ++I) + addIntervalToPartition(*I); + + // Now that we know all of the successor information, propagate this to the + // predecessors for each block. + for (unsigned i = 0, e = Intervals.size(); i != e; ++i) + updatePredecessors(Intervals[i]); + return false; +} + + +// IntervalPartition ctor - Build a reduced interval partition from an +// existing interval graph. This takes an additional boolean parameter to +// distinguish it from a copy constructor. Always pass in false for now. +// +IntervalPartition::IntervalPartition(IntervalPartition &IP, bool) + : FunctionPass(ID) { + assert(IP.getRootInterval() && "Cannot operate on empty IntervalPartitions!"); + + // Pass false to intervals_begin because we take ownership of it's memory + interval_part_interval_iterator I = intervals_begin(IP, false); + assert(I != intervals_end(IP) && "No intervals in interval partition!?!?!"); + + addIntervalToPartition(RootInterval = *I); + + ++I; // After the first one... + + // Add the rest of the intervals to the partition. + for (interval_part_interval_iterator E = intervals_end(IP); I != E; ++I) + addIntervalToPartition(*I); + + // Now that we know all of the successor information, propagate this to the + // predecessors for each block. + for (unsigned i = 0, e = Intervals.size(); i != e; ++i) + updatePredecessors(Intervals[i]); +} + diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp new file mode 100644 index 0000000..e32dbc4 --- /dev/null +++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp @@ -0,0 +1,941 @@ +//===- LazyValueInfo.cpp - Value constraint analysis ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for lazy computation of value constraint +// information. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "lazy-value-info" +#include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/ConstantRange.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ValueHandle.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +char LazyValueInfo::ID = 0; +INITIALIZE_PASS(LazyValueInfo, "lazy-value-info", + "Lazy Value Information Analysis", false, true); + +namespace llvm { + FunctionPass *createLazyValueInfoPass() { return new LazyValueInfo(); } +} + + +//===----------------------------------------------------------------------===// +// LVILatticeVal +//===----------------------------------------------------------------------===// + +/// LVILatticeVal - This is the information tracked by LazyValueInfo for each +/// value. +/// +/// FIXME: This is basically just for bringup, this can be made a lot more rich +/// in the future. +/// +namespace { +class LVILatticeVal { + enum LatticeValueTy { + /// undefined - This LLVM Value has no known value yet. + undefined, + + /// constant - This LLVM Value has a specific constant value. + constant, + /// notconstant - This LLVM value is known to not have the specified value. + notconstant, + + /// constantrange + constantrange, + + /// overdefined - This instruction is not known to be constant, and we know + /// it has a value. + overdefined + }; + + /// Val: This stores the current lattice value along with the Constant* for + /// the constant if this is a 'constant' or 'notconstant' value. + LatticeValueTy Tag; + Constant *Val; + ConstantRange Range; + +public: + LVILatticeVal() : Tag(undefined), Val(0), Range(1, true) {} + + static LVILatticeVal get(Constant *C) { + LVILatticeVal Res; + if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) + Res.markConstantRange(ConstantRange(CI->getValue(), CI->getValue()+1)); + else if (!isa<UndefValue>(C)) + Res.markConstant(C); + return Res; + } + static LVILatticeVal getNot(Constant *C) { + LVILatticeVal Res; + if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) + Res.markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue())); + else + Res.markNotConstant(C); + return Res; + } + static LVILatticeVal getRange(ConstantRange CR) { + LVILatticeVal Res; + Res.markConstantRange(CR); + return Res; + } + + bool isUndefined() const { return Tag == undefined; } + bool isConstant() const { return Tag == constant; } + bool isNotConstant() const { return Tag == notconstant; } + bool isConstantRange() const { return Tag == constantrange; } + bool isOverdefined() const { return Tag == overdefined; } + + Constant *getConstant() const { + assert(isConstant() && "Cannot get the constant of a non-constant!"); + return Val; + } + + Constant *getNotConstant() const { + assert(isNotConstant() && "Cannot get the constant of a non-notconstant!"); + return Val; + } + + ConstantRange getConstantRange() const { + assert(isConstantRange() && + "Cannot get the constant-range of a non-constant-range!"); + return Range; + } + + /// markOverdefined - Return true if this is a change in status. + bool markOverdefined() { + if (isOverdefined()) + return false; + Tag = overdefined; + return true; + } + + /// markConstant - Return true if this is a change in status. + bool markConstant(Constant *V) { + if (isConstant()) { + assert(getConstant() == V && "Marking constant with different value"); + return false; + } + + assert(isUndefined()); + Tag = constant; + assert(V && "Marking constant with NULL"); + Val = V; + return true; + } + + /// markNotConstant - Return true if this is a change in status. + bool markNotConstant(Constant *V) { + if (isNotConstant()) { + assert(getNotConstant() == V && "Marking !constant with different value"); + return false; + } + + if (isConstant()) + assert(getConstant() != V && "Marking not constant with different value"); + else + assert(isUndefined()); + + Tag = notconstant; + assert(V && "Marking constant with NULL"); + Val = V; + return true; + } + + /// markConstantRange - Return true if this is a change in status. + bool markConstantRange(const ConstantRange NewR) { + if (isConstantRange()) { + if (NewR.isEmptySet()) + return markOverdefined(); + + bool changed = Range == NewR; + Range = NewR; + return changed; + } + + assert(isUndefined()); + if (NewR.isEmptySet()) + return markOverdefined(); + + Tag = constantrange; + Range = NewR; + return true; + } + + /// mergeIn - Merge the specified lattice value into this one, updating this + /// one and returning true if anything changed. + bool mergeIn(const LVILatticeVal &RHS) { + if (RHS.isUndefined() || isOverdefined()) return false; + if (RHS.isOverdefined()) return markOverdefined(); + + if (RHS.isNotConstant()) { + if (isNotConstant()) { + if (getNotConstant() != RHS.getNotConstant() || + isa<ConstantExpr>(getNotConstant()) || + isa<ConstantExpr>(RHS.getNotConstant())) + return markOverdefined(); + return false; + } else if (isConstant()) { + if (getConstant() == RHS.getNotConstant() || + isa<ConstantExpr>(RHS.getNotConstant()) || + isa<ConstantExpr>(getConstant())) + return markOverdefined(); + return markNotConstant(RHS.getNotConstant()); + } else if (isConstantRange()) { + return markOverdefined(); + } + + assert(isUndefined() && "Unexpected lattice"); + return markNotConstant(RHS.getNotConstant()); + } + + if (RHS.isConstantRange()) { + if (isConstantRange()) { + ConstantRange NewR = Range.unionWith(RHS.getConstantRange()); + if (NewR.isFullSet()) + return markOverdefined(); + else + return markConstantRange(NewR); + } else if (!isUndefined()) { + return markOverdefined(); + } + + assert(isUndefined() && "Unexpected lattice"); + return markConstantRange(RHS.getConstantRange()); + } + + // RHS must be a constant, we must be undef, constant, or notconstant. + assert(!isConstantRange() && + "Constant and ConstantRange cannot be merged."); + + if (isUndefined()) + return markConstant(RHS.getConstant()); + + if (isConstant()) { + if (getConstant() != RHS.getConstant()) + return markOverdefined(); + return false; + } + + // If we are known "!=4" and RHS is "==5", stay at "!=4". + if (getNotConstant() == RHS.getConstant() || + isa<ConstantExpr>(getNotConstant()) || + isa<ConstantExpr>(RHS.getConstant())) + return markOverdefined(); + return false; + } + +}; + +} // end anonymous namespace. + +namespace llvm { +raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) { + if (Val.isUndefined()) + return OS << "undefined"; + if (Val.isOverdefined()) + return OS << "overdefined"; + + if (Val.isNotConstant()) + return OS << "notconstant<" << *Val.getNotConstant() << '>'; + else if (Val.isConstantRange()) + return OS << "constantrange<" << Val.getConstantRange().getLower() << ", " + << Val.getConstantRange().getUpper() << '>'; + return OS << "constant<" << *Val.getConstant() << '>'; +} +} + +//===----------------------------------------------------------------------===// +// LazyValueInfoCache Decl +//===----------------------------------------------------------------------===// + +namespace { + /// LazyValueInfoCache - This is the cache kept by LazyValueInfo which + /// maintains information about queries across the clients' queries. + class LazyValueInfoCache { + public: + /// BlockCacheEntryTy - This is a computed lattice value at the end of the + /// specified basic block for a Value* that depends on context. + typedef std::pair<AssertingVH<BasicBlock>, LVILatticeVal> BlockCacheEntryTy; + + /// ValueCacheEntryTy - This is all of the cached block information for + /// exactly one Value*. The entries are sorted by the BasicBlock* of the + /// entries, allowing us to do a lookup with a binary search. + typedef std::map<AssertingVH<BasicBlock>, LVILatticeVal> ValueCacheEntryTy; + + private: + /// LVIValueHandle - A callback value handle update the cache when + /// values are erased. + struct LVIValueHandle : public CallbackVH { + LazyValueInfoCache *Parent; + + LVIValueHandle(Value *V, LazyValueInfoCache *P) + : CallbackVH(V), Parent(P) { } + + void deleted(); + void allUsesReplacedWith(Value* V) { + deleted(); + } + + LVIValueHandle &operator=(Value *V) { + return *this = LVIValueHandle(V, Parent); + } + }; + + /// ValueCache - This is all of the cached information for all values, + /// mapped from Value* to key information. + std::map<LVIValueHandle, ValueCacheEntryTy> ValueCache; + + /// OverDefinedCache - This tracks, on a per-block basis, the set of + /// values that are over-defined at the end of that block. This is required + /// for cache updating. + std::set<std::pair<AssertingVH<BasicBlock>, Value*> > OverDefinedCache; + + public: + + /// getValueInBlock - This is the query interface to determine the lattice + /// value for the specified Value* at the end of the specified block. + LVILatticeVal getValueInBlock(Value *V, BasicBlock *BB); + + /// getValueOnEdge - This is the query interface to determine the lattice + /// value for the specified Value* that is true on the specified edge. + LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB); + + /// threadEdge - This is the update interface to inform the cache that an + /// edge from PredBB to OldSucc has been threaded to be from PredBB to + /// NewSucc. + void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc); + + /// eraseBlock - This is part of the update interface to inform the cache + /// that a block has been deleted. + void eraseBlock(BasicBlock *BB); + + /// clear - Empty the cache. + void clear() { + ValueCache.clear(); + OverDefinedCache.clear(); + } + }; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// LVIQuery Impl +//===----------------------------------------------------------------------===// + +namespace { + /// LVIQuery - This is a transient object that exists while a query is + /// being performed. + /// + /// TODO: Reuse LVIQuery instead of recreating it for every query, this avoids + /// reallocation of the densemap on every query. + class LVIQuery { + typedef LazyValueInfoCache::BlockCacheEntryTy BlockCacheEntryTy; + typedef LazyValueInfoCache::ValueCacheEntryTy ValueCacheEntryTy; + + /// This is the current value being queried for. + Value *Val; + + /// This is a pointer to the owning cache, for recursive queries. + LazyValueInfoCache &Parent; + + /// This is all of the cached information about this value. + ValueCacheEntryTy &Cache; + + /// This tracks, for each block, what values are overdefined. + std::set<std::pair<AssertingVH<BasicBlock>, Value*> > &OverDefinedCache; + + /// NewBlocks - This is a mapping of the new BasicBlocks which have been + /// added to cache but that are not in sorted order. + DenseSet<BasicBlock*> NewBlockInfo; + + public: + + LVIQuery(Value *V, LazyValueInfoCache &P, + ValueCacheEntryTy &VC, + std::set<std::pair<AssertingVH<BasicBlock>, Value*> > &ODC) + : Val(V), Parent(P), Cache(VC), OverDefinedCache(ODC) { + } + + ~LVIQuery() { + // When the query is done, insert the newly discovered facts into the + // cache in sorted order. + if (NewBlockInfo.empty()) return; + + for (DenseSet<BasicBlock*>::iterator I = NewBlockInfo.begin(), + E = NewBlockInfo.end(); I != E; ++I) { + if (Cache[*I].isOverdefined()) + OverDefinedCache.insert(std::make_pair(*I, Val)); + } + } + + LVILatticeVal getBlockValue(BasicBlock *BB); + LVILatticeVal getEdgeValue(BasicBlock *FromBB, BasicBlock *ToBB); + + private: + LVILatticeVal getCachedEntryForBlock(BasicBlock *BB); + }; +} // end anonymous namespace + +void LazyValueInfoCache::LVIValueHandle::deleted() { + for (std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator + I = Parent->OverDefinedCache.begin(), + E = Parent->OverDefinedCache.end(); + I != E; ) { + std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator tmp = I; + ++I; + if (tmp->second == getValPtr()) + Parent->OverDefinedCache.erase(tmp); + } + + // This erasure deallocates *this, so it MUST happen after we're done + // using any and all members of *this. + Parent->ValueCache.erase(*this); +} + +void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { + for (std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator + I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E; ) { + std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator tmp = I; + ++I; + if (tmp->first == BB) + OverDefinedCache.erase(tmp); + } + + for (std::map<LVIValueHandle, ValueCacheEntryTy>::iterator + I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I) + I->second.erase(BB); +} + +/// getCachedEntryForBlock - See if we already have a value for this block. If +/// so, return it, otherwise create a new entry in the Cache map to use. +LVILatticeVal LVIQuery::getCachedEntryForBlock(BasicBlock *BB) { + NewBlockInfo.insert(BB); + return Cache[BB]; +} + +LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) { + // See if we already have a value for this block. + LVILatticeVal BBLV = getCachedEntryForBlock(BB); + + // If we've already computed this block's value, return it. + if (!BBLV.isUndefined()) { + DEBUG(dbgs() << " reuse BB '" << BB->getName() << "' val=" << BBLV <<'\n'); + return BBLV; + } + + // Otherwise, this is the first time we're seeing this block. Reset the + // lattice value to overdefined, so that cycles will terminate and be + // conservatively correct. + BBLV.markOverdefined(); + Cache[BB] = BBLV; + + Instruction *BBI = dyn_cast<Instruction>(Val); + if (BBI == 0 || BBI->getParent() != BB) { + LVILatticeVal Result; // Start Undefined. + + // If this is a pointer, and there's a load from that pointer in this BB, + // then we know that the pointer can't be NULL. + bool NotNull = false; + if (Val->getType()->isPointerTy()) { + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();BI != BE;++BI){ + LoadInst *L = dyn_cast<LoadInst>(BI); + if (L && L->getPointerAddressSpace() == 0 && + L->getPointerOperand()->getUnderlyingObject() == + Val->getUnderlyingObject()) { + NotNull = true; + break; + } + } + } + + unsigned NumPreds = 0; + // Loop over all of our predecessors, merging what we know from them into + // result. + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + Result.mergeIn(getEdgeValue(*PI, BB)); + + // If we hit overdefined, exit early. The BlockVals entry is already set + // to overdefined. + if (Result.isOverdefined()) { + DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined because of pred.\n"); + // If we previously determined that this is a pointer that can't be null + // then return that rather than giving up entirely. + if (NotNull) { + const PointerType *PTy = cast<PointerType>(Val->getType()); + Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy)); + } + + return Result; + } + ++NumPreds; + } + + + // If this is the entry block, we must be asking about an argument. The + // value is overdefined. + if (NumPreds == 0 && BB == &BB->getParent()->front()) { + assert(isa<Argument>(Val) && "Unknown live-in to the entry block"); + Result.markOverdefined(); + return Result; + } + + // Return the merged value, which is more precise than 'overdefined'. + assert(!Result.isOverdefined()); + return Cache[BB] = Result; + } + + // If this value is defined by an instruction in this block, we have to + // process it here somehow or return overdefined. + if (PHINode *PN = dyn_cast<PHINode>(BBI)) { + LVILatticeVal Result; // Start Undefined. + + // Loop over all of our predecessors, merging what we know from them into + // result. + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + Value* PhiVal = PN->getIncomingValueForBlock(*PI); + Result.mergeIn(Parent.getValueOnEdge(PhiVal, *PI, BB)); + + // If we hit overdefined, exit early. The BlockVals entry is already set + // to overdefined. + if (Result.isOverdefined()) { + DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined because of pred.\n"); + return Result; + } + } + + // Return the merged value, which is more precise than 'overdefined'. + assert(!Result.isOverdefined()); + return Cache[BB] = Result; + } + + assert(Cache[BB].isOverdefined() && "Recursive query changed our cache?"); + + // We can only analyze the definitions of certain classes of instructions + // (integral binops and casts at the moment), so bail if this isn't one. + LVILatticeVal Result; + if ((!isa<BinaryOperator>(BBI) && !isa<CastInst>(BBI)) || + !BBI->getType()->isIntegerTy()) { + DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined because inst def found.\n"); + Result.markOverdefined(); + return Result; + } + + // FIXME: We're currently limited to binops with a constant RHS. This should + // be improved. + BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI); + if (BO && !isa<ConstantInt>(BO->getOperand(1))) { + DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined because inst def found.\n"); + + Result.markOverdefined(); + return Result; + } + + // Figure out the range of the LHS. If that fails, bail. + LVILatticeVal LHSVal = Parent.getValueInBlock(BBI->getOperand(0), BB); + if (!LHSVal.isConstantRange()) { + Result.markOverdefined(); + return Result; + } + + ConstantInt *RHS = 0; + ConstantRange LHSRange = LHSVal.getConstantRange(); + ConstantRange RHSRange(1); + const IntegerType *ResultTy = cast<IntegerType>(BBI->getType()); + if (isa<BinaryOperator>(BBI)) { + RHS = dyn_cast<ConstantInt>(BBI->getOperand(1)); + if (!RHS) { + Result.markOverdefined(); + return Result; + } + + RHSRange = ConstantRange(RHS->getValue(), RHS->getValue()+1); + } + + // NOTE: We're currently limited by the set of operations that ConstantRange + // can evaluate symbolically. Enhancing that set will allows us to analyze + // more definitions. + switch (BBI->getOpcode()) { + case Instruction::Add: + Result.markConstantRange(LHSRange.add(RHSRange)); + break; + case Instruction::Sub: + Result.markConstantRange(LHSRange.sub(RHSRange)); + break; + case Instruction::Mul: + Result.markConstantRange(LHSRange.multiply(RHSRange)); + break; + case Instruction::UDiv: + Result.markConstantRange(LHSRange.udiv(RHSRange)); + break; + case Instruction::Shl: + Result.markConstantRange(LHSRange.shl(RHSRange)); + break; + case Instruction::LShr: + Result.markConstantRange(LHSRange.lshr(RHSRange)); + break; + case Instruction::Trunc: + Result.markConstantRange(LHSRange.truncate(ResultTy->getBitWidth())); + break; + case Instruction::SExt: + Result.markConstantRange(LHSRange.signExtend(ResultTy->getBitWidth())); + break; + case Instruction::ZExt: + Result.markConstantRange(LHSRange.zeroExtend(ResultTy->getBitWidth())); + break; + case Instruction::BitCast: + Result.markConstantRange(LHSRange); + break; + + // Unhandled instructions are overdefined. + default: + DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined because inst def found.\n"); + Result.markOverdefined(); + break; + } + + return Cache[BB] = Result; +} + + +/// getEdgeValue - This method attempts to infer more complex +LVILatticeVal LVIQuery::getEdgeValue(BasicBlock *BBFrom, BasicBlock *BBTo) { + // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we + // know that v != 0. + if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) { + // If this is a conditional branch and only one successor goes to BBTo, then + // we maybe able to infer something from the condition. + if (BI->isConditional() && + BI->getSuccessor(0) != BI->getSuccessor(1)) { + bool isTrueDest = BI->getSuccessor(0) == BBTo; + assert(BI->getSuccessor(!isTrueDest) == BBTo && + "BBTo isn't a successor of BBFrom"); + + // If V is the condition of the branch itself, then we know exactly what + // it is. + if (BI->getCondition() == Val) + return LVILatticeVal::get(ConstantInt::get( + Type::getInt1Ty(Val->getContext()), isTrueDest)); + + // If the condition of the branch is an equality comparison, we may be + // able to infer the value. + ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()); + if (ICI && ICI->getOperand(0) == Val && + isa<Constant>(ICI->getOperand(1))) { + if (ICI->isEquality()) { + // We know that V has the RHS constant if this is a true SETEQ or + // false SETNE. + if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ)) + return LVILatticeVal::get(cast<Constant>(ICI->getOperand(1))); + return LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1))); + } + + if (ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1))) { + // Calculate the range of values that would satisfy the comparison. + ConstantRange CmpRange(CI->getValue(), CI->getValue()+1); + ConstantRange TrueValues = + ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange); + + // If we're interested in the false dest, invert the condition. + if (!isTrueDest) TrueValues = TrueValues.inverse(); + + // Figure out the possible values of the query BEFORE this branch. + LVILatticeVal InBlock = getBlockValue(BBFrom); + if (!InBlock.isConstantRange()) + return LVILatticeVal::getRange(TrueValues); + + // Find all potential values that satisfy both the input and output + // conditions. + ConstantRange PossibleValues = + TrueValues.intersectWith(InBlock.getConstantRange()); + + return LVILatticeVal::getRange(PossibleValues); + } + } + } + } + + // If the edge was formed by a switch on the value, then we may know exactly + // what it is. + if (SwitchInst *SI = dyn_cast<SwitchInst>(BBFrom->getTerminator())) { + if (SI->getCondition() == Val) { + // We don't know anything in the default case. + if (SI->getDefaultDest() == BBTo) { + LVILatticeVal Result; + Result.markOverdefined(); + return Result; + } + + // We only know something if there is exactly one value that goes from + // BBFrom to BBTo. + unsigned NumEdges = 0; + ConstantInt *EdgeVal = 0; + for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) { + if (SI->getSuccessor(i) != BBTo) continue; + if (NumEdges++) break; + EdgeVal = SI->getCaseValue(i); + } + assert(EdgeVal && "Missing successor?"); + if (NumEdges == 1) + return LVILatticeVal::get(EdgeVal); + } + } + + // Otherwise see if the value is known in the block. + return getBlockValue(BBFrom); +} + + +//===----------------------------------------------------------------------===// +// LazyValueInfoCache Impl +//===----------------------------------------------------------------------===// + +LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB) { + // If already a constant, there is nothing to compute. + if (Constant *VC = dyn_cast<Constant>(V)) + return LVILatticeVal::get(VC); + + DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '" + << BB->getName() << "'\n"); + + LVILatticeVal Result = LVIQuery(V, *this, + ValueCache[LVIValueHandle(V, this)], + OverDefinedCache).getBlockValue(BB); + + DEBUG(dbgs() << " Result = " << Result << "\n"); + return Result; +} + +LVILatticeVal LazyValueInfoCache:: +getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB) { + // If already a constant, there is nothing to compute. + if (Constant *VC = dyn_cast<Constant>(V)) + return LVILatticeVal::get(VC); + + DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '" + << FromBB->getName() << "' to '" << ToBB->getName() << "'\n"); + + LVILatticeVal Result = + LVIQuery(V, *this, ValueCache[LVIValueHandle(V, this)], + OverDefinedCache).getEdgeValue(FromBB, ToBB); + + DEBUG(dbgs() << " Result = " << Result << "\n"); + + return Result; +} + +void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, + BasicBlock *NewSucc) { + // When an edge in the graph has been threaded, values that we could not + // determine a value for before (i.e. were marked overdefined) may be possible + // to solve now. We do NOT try to proactively update these values. Instead, + // we clear their entries from the cache, and allow lazy updating to recompute + // them when needed. + + // The updating process is fairly simple: we need to dropped cached info + // for all values that were marked overdefined in OldSucc, and for those same + // values in any successor of OldSucc (except NewSucc) in which they were + // also marked overdefined. + std::vector<BasicBlock*> worklist; + worklist.push_back(OldSucc); + + DenseSet<Value*> ClearSet; + for (std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator + I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E; ++I) { + if (I->first == OldSucc) + ClearSet.insert(I->second); + } + + // Use a worklist to perform a depth-first search of OldSucc's successors. + // NOTE: We do not need a visited list since any blocks we have already + // visited will have had their overdefined markers cleared already, and we + // thus won't loop to their successors. + while (!worklist.empty()) { + BasicBlock *ToUpdate = worklist.back(); + worklist.pop_back(); + + // Skip blocks only accessible through NewSucc. + if (ToUpdate == NewSucc) continue; + + bool changed = false; + for (DenseSet<Value*>::iterator I = ClearSet.begin(),E = ClearSet.end(); + I != E; ++I) { + // If a value was marked overdefined in OldSucc, and is here too... + std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator OI = + OverDefinedCache.find(std::make_pair(ToUpdate, *I)); + if (OI == OverDefinedCache.end()) continue; + + // Remove it from the caches. + ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(*I, this)]; + ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate); + + assert(CI != Entry.end() && "Couldn't find entry to update?"); + Entry.erase(CI); + OverDefinedCache.erase(OI); + + // If we removed anything, then we potentially need to update + // blocks successors too. + changed = true; + } + + if (!changed) continue; + + worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate)); + } +} + +//===----------------------------------------------------------------------===// +// LazyValueInfo Impl +//===----------------------------------------------------------------------===// + +/// getCache - This lazily constructs the LazyValueInfoCache. +static LazyValueInfoCache &getCache(void *&PImpl) { + if (!PImpl) + PImpl = new LazyValueInfoCache(); + return *static_cast<LazyValueInfoCache*>(PImpl); +} + +bool LazyValueInfo::runOnFunction(Function &F) { + if (PImpl) + getCache(PImpl).clear(); + + TD = getAnalysisIfAvailable<TargetData>(); + // Fully lazy. + return false; +} + +void LazyValueInfo::releaseMemory() { + // If the cache was allocated, free it. + if (PImpl) { + delete &getCache(PImpl); + PImpl = 0; + } +} + +Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) { + LVILatticeVal Result = getCache(PImpl).getValueInBlock(V, BB); + + if (Result.isConstant()) + return Result.getConstant(); + else if (Result.isConstantRange()) { + ConstantRange CR = Result.getConstantRange(); + if (const APInt *SingleVal = CR.getSingleElement()) + return ConstantInt::get(V->getContext(), *SingleVal); + } + return 0; +} + +/// getConstantOnEdge - Determine whether the specified value is known to be a +/// constant on the specified edge. Return null if not. +Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, + BasicBlock *ToBB) { + LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB); + + if (Result.isConstant()) + return Result.getConstant(); + else if (Result.isConstantRange()) { + ConstantRange CR = Result.getConstantRange(); + if (const APInt *SingleVal = CR.getSingleElement()) + return ConstantInt::get(V->getContext(), *SingleVal); + } + return 0; +} + +/// getPredicateOnEdge - Determine whether the specified value comparison +/// with a constant is known to be true or false on the specified CFG edge. +/// Pred is a CmpInst predicate. +LazyValueInfo::Tristate +LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, + BasicBlock *FromBB, BasicBlock *ToBB) { + LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB); + + // If we know the value is a constant, evaluate the conditional. + Constant *Res = 0; + if (Result.isConstant()) { + Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD); + if (ConstantInt *ResCI = dyn_cast_or_null<ConstantInt>(Res)) + return ResCI->isZero() ? False : True; + return Unknown; + } + + if (Result.isConstantRange()) { + ConstantInt *CI = dyn_cast<ConstantInt>(C); + if (!CI) return Unknown; + + ConstantRange CR = Result.getConstantRange(); + if (Pred == ICmpInst::ICMP_EQ) { + if (!CR.contains(CI->getValue())) + return False; + + if (CR.isSingleElement() && CR.contains(CI->getValue())) + return True; + } else if (Pred == ICmpInst::ICMP_NE) { + if (!CR.contains(CI->getValue())) + return True; + + if (CR.isSingleElement() && CR.contains(CI->getValue())) + return False; + } + + // Handle more complex predicates. + ConstantRange RHS(CI->getValue(), CI->getValue()+1); + ConstantRange TrueValues = ConstantRange::makeICmpRegion(Pred, RHS); + if (CR.intersectWith(TrueValues).isEmptySet()) + return False; + else if (TrueValues.contains(CR)) + return True; + + return Unknown; + } + + if (Result.isNotConstant()) { + // If this is an equality comparison, we can try to fold it knowing that + // "V != C1". + if (Pred == ICmpInst::ICMP_EQ) { + // !C1 == C -> false iff C1 == C. + Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, + Result.getNotConstant(), C, TD); + if (Res->isNullValue()) + return False; + } else if (Pred == ICmpInst::ICMP_NE) { + // !C1 != C -> true iff C1 == C. + Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, + Result.getNotConstant(), C, TD); + if (Res->isNullValue()) + return True; + } + return Unknown; + } + + return Unknown; +} + +void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, + BasicBlock* NewSucc) { + if (PImpl) getCache(PImpl).threadEdge(PredBB, OldSucc, NewSucc); +} + +void LazyValueInfo::eraseBlock(BasicBlock *BB) { + if (PImpl) getCache(PImpl).eraseBlock(BB); +} diff --git a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp new file mode 100644 index 0000000..7f51202 --- /dev/null +++ b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp @@ -0,0 +1,137 @@ +//===- LibCallAliasAnalysis.cpp - Implement AliasAnalysis for libcalls ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LibCallAliasAnalysis class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LibCallAliasAnalysis.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/Function.h" +#include "llvm/Pass.h" +using namespace llvm; + +// Register this pass... +char LibCallAliasAnalysis::ID = 0; +INITIALIZE_AG_PASS(LibCallAliasAnalysis, AliasAnalysis, "libcall-aa", + "LibCall Alias Analysis", false, true, false); + +FunctionPass *llvm::createLibCallAliasAnalysisPass(LibCallInfo *LCI) { + return new LibCallAliasAnalysis(LCI); +} + +LibCallAliasAnalysis::~LibCallAliasAnalysis() { + delete LCI; +} + +void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AliasAnalysis::getAnalysisUsage(AU); + AU.setPreservesAll(); // Does not transform code +} + + + +/// AnalyzeLibCallDetails - Given a call to a function with the specified +/// LibCallFunctionInfo, see if we can improve the mod/ref footprint of the call +/// vs the specified pointer/size. +AliasAnalysis::ModRefResult +LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI, + ImmutableCallSite CS, const Value *P, + unsigned Size) { + // If we have a function, check to see what kind of mod/ref effects it + // has. Start by including any info globally known about the function. + AliasAnalysis::ModRefResult MRInfo = FI->UniversalBehavior; + if (MRInfo == NoModRef) return MRInfo; + + // If that didn't tell us that the function is 'readnone', check to see + // if we have detailed info and if 'P' is any of the locations we know + // about. + const LibCallFunctionInfo::LocationMRInfo *Details = FI->LocationDetails; + if (Details == 0) + return MRInfo; + + // If the details array is of the 'DoesNot' kind, we only know something if + // the pointer is a match for one of the locations in 'Details'. If we find a + // match, we can prove some interactions cannot happen. + // + if (FI->DetailsType == LibCallFunctionInfo::DoesNot) { + // Find out if the pointer refers to a known location. + for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) { + const LibCallLocationInfo &Loc = + LCI->getLocationInfo(Details[i].LocationID); + LibCallLocationInfo::LocResult Res = Loc.isLocation(CS, P, Size); + if (Res != LibCallLocationInfo::Yes) continue; + + // If we find a match against a location that we 'do not' interact with, + // learn this info into MRInfo. + return ModRefResult(MRInfo & ~Details[i].MRInfo); + } + return MRInfo; + } + + // If the details are of the 'DoesOnly' sort, we know something if the pointer + // is a match for one of the locations in 'Details'. Also, if we can prove + // that the pointers is *not* one of the locations in 'Details', we know that + // the call is NoModRef. + assert(FI->DetailsType == LibCallFunctionInfo::DoesOnly); + + // Find out if the pointer refers to a known location. + bool NoneMatch = true; + for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) { + const LibCallLocationInfo &Loc = + LCI->getLocationInfo(Details[i].LocationID); + LibCallLocationInfo::LocResult Res = Loc.isLocation(CS, P, Size); + if (Res == LibCallLocationInfo::No) continue; + + // If we don't know if this pointer points to the location, then we have to + // assume it might alias in some case. + if (Res == LibCallLocationInfo::Unknown) { + NoneMatch = false; + continue; + } + + // If we know that this pointer definitely is pointing into the location, + // merge in this information. + return ModRefResult(MRInfo & Details[i].MRInfo); + } + + // If we found that the pointer is guaranteed to not match any of the + // locations in our 'DoesOnly' rule, then we know that the pointer must point + // to some other location. Since the libcall doesn't mod/ref any other + // locations, return NoModRef. + if (NoneMatch) + return NoModRef; + + // Otherwise, return any other info gained so far. + return MRInfo; +} + +// getModRefInfo - Check to see if the specified callsite can clobber the +// specified memory object. +// +AliasAnalysis::ModRefResult +LibCallAliasAnalysis::getModRefInfo(ImmutableCallSite CS, + const Value *P, unsigned Size) { + ModRefResult MRInfo = ModRef; + + // If this is a direct call to a function that LCI knows about, get the + // information about the runtime function. + if (LCI) { + if (const Function *F = CS.getCalledFunction()) { + if (const LibCallFunctionInfo *FI = LCI->getFunctionInfo(F)) { + MRInfo = ModRefResult(MRInfo & AnalyzeLibCallDetails(FI, CS, P, Size)); + if (MRInfo == NoModRef) return NoModRef; + } + } + } + + // The AliasAnalysis base class has some smarts, lets use them. + return (ModRefResult)(MRInfo | AliasAnalysis::getModRefInfo(CS, P, Size)); +} diff --git a/contrib/llvm/lib/Analysis/LibCallSemantics.cpp b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp new file mode 100644 index 0000000..81b0f46 --- /dev/null +++ b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp @@ -0,0 +1,63 @@ +//===- LibCallSemantics.cpp - Describe library semantics ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements interfaces that can be used to describe language +// specific runtime library interfaces (e.g. libc, libm, etc) to LLVM +// optimizers. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Function.h" +using namespace llvm; + +/// getMap - This impl pointer in ~LibCallInfo is actually a StringMap. This +/// helper does the cast. +static StringMap<const LibCallFunctionInfo*> *getMap(void *Ptr) { + return static_cast<StringMap<const LibCallFunctionInfo*> *>(Ptr); +} + +LibCallInfo::~LibCallInfo() { + delete getMap(Impl); +} + +const LibCallLocationInfo &LibCallInfo::getLocationInfo(unsigned LocID) const { + // Get location info on the first call. + if (NumLocations == 0) + NumLocations = getLocationInfo(Locations); + + assert(LocID < NumLocations && "Invalid location ID!"); + return Locations[LocID]; +} + + +/// getFunctionInfo - Return the LibCallFunctionInfo object corresponding to +/// the specified function if we have it. If not, return null. +const LibCallFunctionInfo * +LibCallInfo::getFunctionInfo(const Function *F) const { + StringMap<const LibCallFunctionInfo*> *Map = getMap(Impl); + + /// If this is the first time we are querying for this info, lazily construct + /// the StringMap to index it. + if (Map == 0) { + Impl = Map = new StringMap<const LibCallFunctionInfo*>(); + + const LibCallFunctionInfo *Array = getFunctionInfoArray(); + if (Array == 0) return 0; + + // We now have the array of entries. Populate the StringMap. + for (unsigned i = 0; Array[i].Name; ++i) + (*Map)[Array[i].Name] = Array+i; + } + + // Look up this function in the string map. + return Map->lookup(F->getName()); +} + diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp new file mode 100644 index 0000000..a9d9724 --- /dev/null +++ b/contrib/llvm/lib/Analysis/Lint.cpp @@ -0,0 +1,662 @@ +//===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass statically checks for common and easily-identified constructs +// which produce undefined or likely unintended behavior in LLVM IR. +// +// It is not a guarantee of correctness, in two ways. First, it isn't +// comprehensive. There are checks which could be done statically which are +// not yet implemented. Some of these are indicated by TODO comments, but +// those aren't comprehensive either. Second, many conditions cannot be +// checked statically. This pass does no dynamic instrumentation, so it +// can't check for all possible problems. +// +// Another limitation is that it assumes all code will be executed. A store +// through a null pointer in a basic block which is never reached is harmless, +// but this pass will warn about it anyway. This is the main reason why most +// of these checks live here instead of in the Verifier pass. +// +// Optimization passes may make conditions that this pass checks for more or +// less obvious. If an optimization pass appears to be introducing a warning, +// it may be that the optimization pass is merely exposing an existing +// condition in the code. +// +// This code may be run before instcombine. In many cases, instcombine checks +// for the same kinds of things and turns instructions with undefined behavior +// into unreachable (or equivalent). Because of this, this pass makes some +// effort to look through bitcasts and so on. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/Lint.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Pass.h" +#include "llvm/PassManager.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Function.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/InstVisitor.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +namespace { + namespace MemRef { + static unsigned Read = 1; + static unsigned Write = 2; + static unsigned Callee = 4; + static unsigned Branchee = 8; + } + + class Lint : public FunctionPass, public InstVisitor<Lint> { + friend class InstVisitor<Lint>; + + void visitFunction(Function &F); + + void visitCallSite(CallSite CS); + void visitMemoryReference(Instruction &I, Value *Ptr, + unsigned Size, unsigned Align, + const Type *Ty, unsigned Flags); + + void visitCallInst(CallInst &I); + void visitInvokeInst(InvokeInst &I); + void visitReturnInst(ReturnInst &I); + void visitLoadInst(LoadInst &I); + void visitStoreInst(StoreInst &I); + void visitXor(BinaryOperator &I); + void visitSub(BinaryOperator &I); + void visitLShr(BinaryOperator &I); + void visitAShr(BinaryOperator &I); + void visitShl(BinaryOperator &I); + void visitSDiv(BinaryOperator &I); + void visitUDiv(BinaryOperator &I); + void visitSRem(BinaryOperator &I); + void visitURem(BinaryOperator &I); + void visitAllocaInst(AllocaInst &I); + void visitVAArgInst(VAArgInst &I); + void visitIndirectBrInst(IndirectBrInst &I); + void visitExtractElementInst(ExtractElementInst &I); + void visitInsertElementInst(InsertElementInst &I); + void visitUnreachableInst(UnreachableInst &I); + + Value *findValue(Value *V, bool OffsetOk) const; + Value *findValueImpl(Value *V, bool OffsetOk, + SmallPtrSet<Value *, 4> &Visited) const; + + public: + Module *Mod; + AliasAnalysis *AA; + DominatorTree *DT; + TargetData *TD; + + std::string Messages; + raw_string_ostream MessagesStr; + + static char ID; // Pass identification, replacement for typeid + Lint() : FunctionPass(ID), MessagesStr(Messages) {} + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<AliasAnalysis>(); + AU.addRequired<DominatorTree>(); + } + virtual void print(raw_ostream &O, const Module *M) const {} + + void WriteValue(const Value *V) { + if (!V) return; + if (isa<Instruction>(V)) { + MessagesStr << *V << '\n'; + } else { + WriteAsOperand(MessagesStr, V, true, Mod); + MessagesStr << '\n'; + } + } + + void WriteType(const Type *T) { + if (!T) return; + MessagesStr << ' '; + WriteTypeSymbolic(MessagesStr, T, Mod); + } + + // CheckFailed - A check failed, so print out the condition and the message + // that failed. This provides a nice place to put a breakpoint if you want + // to see why something is not correct. + void CheckFailed(const Twine &Message, + const Value *V1 = 0, const Value *V2 = 0, + const Value *V3 = 0, const Value *V4 = 0) { + MessagesStr << Message.str() << "\n"; + WriteValue(V1); + WriteValue(V2); + WriteValue(V3); + WriteValue(V4); + } + + void CheckFailed(const Twine &Message, const Value *V1, + const Type *T2, const Value *V3 = 0) { + MessagesStr << Message.str() << "\n"; + WriteValue(V1); + WriteType(T2); + WriteValue(V3); + } + + void CheckFailed(const Twine &Message, const Type *T1, + const Type *T2 = 0, const Type *T3 = 0) { + MessagesStr << Message.str() << "\n"; + WriteType(T1); + WriteType(T2); + WriteType(T3); + } + }; +} + +char Lint::ID = 0; +INITIALIZE_PASS(Lint, "lint", "Statically lint-checks LLVM IR", false, true); + +// Assert - We know that cond should be true, if not print an error message. +#define Assert(C, M) \ + do { if (!(C)) { CheckFailed(M); return; } } while (0) +#define Assert1(C, M, V1) \ + do { if (!(C)) { CheckFailed(M, V1); return; } } while (0) +#define Assert2(C, M, V1, V2) \ + do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0) +#define Assert3(C, M, V1, V2, V3) \ + do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0) +#define Assert4(C, M, V1, V2, V3, V4) \ + do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0) + +// Lint::run - This is the main Analysis entry point for a +// function. +// +bool Lint::runOnFunction(Function &F) { + Mod = F.getParent(); + AA = &getAnalysis<AliasAnalysis>(); + DT = &getAnalysis<DominatorTree>(); + TD = getAnalysisIfAvailable<TargetData>(); + visit(F); + dbgs() << MessagesStr.str(); + Messages.clear(); + return false; +} + +void Lint::visitFunction(Function &F) { + // This isn't undefined behavior, it's just a little unusual, and it's a + // fairly common mistake to neglect to name a function. + Assert1(F.hasName() || F.hasLocalLinkage(), + "Unusual: Unnamed function with non-local linkage", &F); + + // TODO: Check for irreducible control flow. +} + +void Lint::visitCallSite(CallSite CS) { + Instruction &I = *CS.getInstruction(); + Value *Callee = CS.getCalledValue(); + + visitMemoryReference(I, Callee, ~0u, 0, 0, MemRef::Callee); + + if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) { + Assert1(CS.getCallingConv() == F->getCallingConv(), + "Undefined behavior: Caller and callee calling convention differ", + &I); + + const FunctionType *FT = F->getFunctionType(); + unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin()); + + Assert1(FT->isVarArg() ? + FT->getNumParams() <= NumActualArgs : + FT->getNumParams() == NumActualArgs, + "Undefined behavior: Call argument count mismatches callee " + "argument count", &I); + + Assert1(FT->getReturnType() == I.getType(), + "Undefined behavior: Call return type mismatches " + "callee return type", &I); + + // Check argument types (in case the callee was casted) and attributes. + // TODO: Verify that caller and callee attributes are compatible. + Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end(); + CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); + for (; AI != AE; ++AI) { + Value *Actual = *AI; + if (PI != PE) { + Argument *Formal = PI++; + Assert1(Formal->getType() == Actual->getType(), + "Undefined behavior: Call argument type mismatches " + "callee parameter type", &I); + + // Check that noalias arguments don't alias other arguments. The + // AliasAnalysis API isn't expressive enough for what we really want + // to do. Known partial overlap is not distinguished from the case + // where nothing is known. + if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) + for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) { + Assert1(AI == BI || AA->alias(*AI, *BI) != AliasAnalysis::MustAlias, + "Unusual: noalias argument aliases another argument", &I); + } + + // Check that an sret argument points to valid memory. + if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { + const Type *Ty = + cast<PointerType>(Formal->getType())->getElementType(); + visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty), + TD ? TD->getABITypeAlignment(Ty) : 0, + Ty, MemRef::Read | MemRef::Write); + } + } + } + } + + if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall()) + for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); + AI != AE; ++AI) { + Value *Obj = findValue(*AI, /*OffsetOk=*/true); + Assert1(!isa<AllocaInst>(Obj), + "Undefined behavior: Call with \"tail\" keyword references " + "alloca", &I); + } + + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) + switch (II->getIntrinsicID()) { + default: break; + + // TODO: Check more intrinsics + + case Intrinsic::memcpy: { + MemCpyInst *MCI = cast<MemCpyInst>(&I); + // TODO: If the size is known, use it. + visitMemoryReference(I, MCI->getDest(), ~0u, MCI->getAlignment(), 0, + MemRef::Write); + visitMemoryReference(I, MCI->getSource(), ~0u, MCI->getAlignment(), 0, + MemRef::Read); + + // Check that the memcpy arguments don't overlap. The AliasAnalysis API + // isn't expressive enough for what we really want to do. Known partial + // overlap is not distinguished from the case where nothing is known. + unsigned Size = 0; + if (const ConstantInt *Len = + dyn_cast<ConstantInt>(findValue(MCI->getLength(), + /*OffsetOk=*/false))) + if (Len->getValue().isIntN(32)) + Size = Len->getValue().getZExtValue(); + Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != + AliasAnalysis::MustAlias, + "Undefined behavior: memcpy source and destination overlap", &I); + break; + } + case Intrinsic::memmove: { + MemMoveInst *MMI = cast<MemMoveInst>(&I); + // TODO: If the size is known, use it. + visitMemoryReference(I, MMI->getDest(), ~0u, MMI->getAlignment(), 0, + MemRef::Write); + visitMemoryReference(I, MMI->getSource(), ~0u, MMI->getAlignment(), 0, + MemRef::Read); + break; + } + case Intrinsic::memset: { + MemSetInst *MSI = cast<MemSetInst>(&I); + // TODO: If the size is known, use it. + visitMemoryReference(I, MSI->getDest(), ~0u, MSI->getAlignment(), 0, + MemRef::Write); + break; + } + + case Intrinsic::vastart: + Assert1(I.getParent()->getParent()->isVarArg(), + "Undefined behavior: va_start called in a non-varargs function", + &I); + + visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0, + MemRef::Read | MemRef::Write); + break; + case Intrinsic::vacopy: + visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0, MemRef::Write); + visitMemoryReference(I, CS.getArgument(1), ~0u, 0, 0, MemRef::Read); + break; + case Intrinsic::vaend: + visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0, + MemRef::Read | MemRef::Write); + break; + + case Intrinsic::stackrestore: + // Stackrestore doesn't read or write memory, but it sets the + // stack pointer, which the compiler may read from or write to + // at any time, so check it for both readability and writeability. + visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0, + MemRef::Read | MemRef::Write); + break; + } +} + +void Lint::visitCallInst(CallInst &I) { + return visitCallSite(&I); +} + +void Lint::visitInvokeInst(InvokeInst &I) { + return visitCallSite(&I); +} + +void Lint::visitReturnInst(ReturnInst &I) { + Function *F = I.getParent()->getParent(); + Assert1(!F->doesNotReturn(), + "Unusual: Return statement in function with noreturn attribute", + &I); + + if (Value *V = I.getReturnValue()) { + Value *Obj = findValue(V, /*OffsetOk=*/true); + Assert1(!isa<AllocaInst>(Obj), + "Unusual: Returning alloca value", &I); + } +} + +// TODO: Check that the reference is in bounds. +// TODO: Check readnone/readonly function attributes. +void Lint::visitMemoryReference(Instruction &I, + Value *Ptr, unsigned Size, unsigned Align, + const Type *Ty, unsigned Flags) { + // If no memory is being referenced, it doesn't matter if the pointer + // is valid. + if (Size == 0) + return; + + Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true); + Assert1(!isa<ConstantPointerNull>(UnderlyingObject), + "Undefined behavior: Null pointer dereference", &I); + Assert1(!isa<UndefValue>(UnderlyingObject), + "Undefined behavior: Undef pointer dereference", &I); + Assert1(!isa<ConstantInt>(UnderlyingObject) || + !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(), + "Unusual: All-ones pointer dereference", &I); + Assert1(!isa<ConstantInt>(UnderlyingObject) || + !cast<ConstantInt>(UnderlyingObject)->isOne(), + "Unusual: Address one pointer dereference", &I); + + if (Flags & MemRef::Write) { + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject)) + Assert1(!GV->isConstant(), + "Undefined behavior: Write to read-only memory", &I); + Assert1(!isa<Function>(UnderlyingObject) && + !isa<BlockAddress>(UnderlyingObject), + "Undefined behavior: Write to text section", &I); + } + if (Flags & MemRef::Read) { + Assert1(!isa<Function>(UnderlyingObject), + "Unusual: Load from function body", &I); + Assert1(!isa<BlockAddress>(UnderlyingObject), + "Undefined behavior: Load from block address", &I); + } + if (Flags & MemRef::Callee) { + Assert1(!isa<BlockAddress>(UnderlyingObject), + "Undefined behavior: Call to block address", &I); + } + if (Flags & MemRef::Branchee) { + Assert1(!isa<Constant>(UnderlyingObject) || + isa<BlockAddress>(UnderlyingObject), + "Undefined behavior: Branch to non-blockaddress", &I); + } + + if (TD) { + if (Align == 0 && Ty) Align = TD->getABITypeAlignment(Ty); + + if (Align != 0) { + unsigned BitWidth = TD->getTypeSizeInBits(Ptr->getType()); + APInt Mask = APInt::getAllOnesValue(BitWidth), + KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(Ptr, Mask, KnownZero, KnownOne, TD); + Assert1(!(KnownOne & APInt::getLowBitsSet(BitWidth, Log2_32(Align))), + "Undefined behavior: Memory reference address is misaligned", &I); + } + } +} + +void Lint::visitLoadInst(LoadInst &I) { + visitMemoryReference(I, I.getPointerOperand(), + AA->getTypeStoreSize(I.getType()), I.getAlignment(), + I.getType(), MemRef::Read); +} + +void Lint::visitStoreInst(StoreInst &I) { + visitMemoryReference(I, I.getPointerOperand(), + AA->getTypeStoreSize(I.getOperand(0)->getType()), + I.getAlignment(), + I.getOperand(0)->getType(), MemRef::Write); +} + +void Lint::visitXor(BinaryOperator &I) { + Assert1(!isa<UndefValue>(I.getOperand(0)) || + !isa<UndefValue>(I.getOperand(1)), + "Undefined result: xor(undef, undef)", &I); +} + +void Lint::visitSub(BinaryOperator &I) { + Assert1(!isa<UndefValue>(I.getOperand(0)) || + !isa<UndefValue>(I.getOperand(1)), + "Undefined result: sub(undef, undef)", &I); +} + +void Lint::visitLShr(BinaryOperator &I) { + if (ConstantInt *CI = + dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) + Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), + "Undefined result: Shift count out of range", &I); +} + +void Lint::visitAShr(BinaryOperator &I) { + if (ConstantInt *CI = + dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) + Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), + "Undefined result: Shift count out of range", &I); +} + +void Lint::visitShl(BinaryOperator &I) { + if (ConstantInt *CI = + dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) + Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), + "Undefined result: Shift count out of range", &I); +} + +static bool isZero(Value *V, TargetData *TD) { + // Assume undef could be zero. + if (isa<UndefValue>(V)) return true; + + unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); + APInt Mask = APInt::getAllOnesValue(BitWidth), + KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD); + return KnownZero.isAllOnesValue(); +} + +void Lint::visitSDiv(BinaryOperator &I) { + Assert1(!isZero(I.getOperand(1), TD), + "Undefined behavior: Division by zero", &I); +} + +void Lint::visitUDiv(BinaryOperator &I) { + Assert1(!isZero(I.getOperand(1), TD), + "Undefined behavior: Division by zero", &I); +} + +void Lint::visitSRem(BinaryOperator &I) { + Assert1(!isZero(I.getOperand(1), TD), + "Undefined behavior: Division by zero", &I); +} + +void Lint::visitURem(BinaryOperator &I) { + Assert1(!isZero(I.getOperand(1), TD), + "Undefined behavior: Division by zero", &I); +} + +void Lint::visitAllocaInst(AllocaInst &I) { + if (isa<ConstantInt>(I.getArraySize())) + // This isn't undefined behavior, it's just an obvious pessimization. + Assert1(&I.getParent()->getParent()->getEntryBlock() == I.getParent(), + "Pessimization: Static alloca outside of entry block", &I); + + // TODO: Check for an unusual size (MSB set?) +} + +void Lint::visitVAArgInst(VAArgInst &I) { + visitMemoryReference(I, I.getOperand(0), ~0u, 0, 0, + MemRef::Read | MemRef::Write); +} + +void Lint::visitIndirectBrInst(IndirectBrInst &I) { + visitMemoryReference(I, I.getAddress(), ~0u, 0, 0, MemRef::Branchee); + + Assert1(I.getNumDestinations() != 0, + "Undefined behavior: indirectbr with no destinations", &I); +} + +void Lint::visitExtractElementInst(ExtractElementInst &I) { + if (ConstantInt *CI = + dyn_cast<ConstantInt>(findValue(I.getIndexOperand(), + /*OffsetOk=*/false))) + Assert1(CI->getValue().ult(I.getVectorOperandType()->getNumElements()), + "Undefined result: extractelement index out of range", &I); +} + +void Lint::visitInsertElementInst(InsertElementInst &I) { + if (ConstantInt *CI = + dyn_cast<ConstantInt>(findValue(I.getOperand(2), + /*OffsetOk=*/false))) + Assert1(CI->getValue().ult(I.getType()->getNumElements()), + "Undefined result: insertelement index out of range", &I); +} + +void Lint::visitUnreachableInst(UnreachableInst &I) { + // This isn't undefined behavior, it's merely suspicious. + Assert1(&I == I.getParent()->begin() || + prior(BasicBlock::iterator(&I))->mayHaveSideEffects(), + "Unusual: unreachable immediately preceded by instruction without " + "side effects", &I); +} + +/// findValue - Look through bitcasts and simple memory reference patterns +/// to identify an equivalent, but more informative, value. If OffsetOk +/// is true, look through getelementptrs with non-zero offsets too. +/// +/// Most analysis passes don't require this logic, because instcombine +/// will simplify most of these kinds of things away. But it's a goal of +/// this Lint pass to be useful even on non-optimized IR. +Value *Lint::findValue(Value *V, bool OffsetOk) const { + SmallPtrSet<Value *, 4> Visited; + return findValueImpl(V, OffsetOk, Visited); +} + +/// findValueImpl - Implementation helper for findValue. +Value *Lint::findValueImpl(Value *V, bool OffsetOk, + SmallPtrSet<Value *, 4> &Visited) const { + // Detect self-referential values. + if (!Visited.insert(V)) + return UndefValue::get(V->getType()); + + // TODO: Look through sext or zext cast, when the result is known to + // be interpreted as signed or unsigned, respectively. + // TODO: Look through eliminable cast pairs. + // TODO: Look through calls with unique return values. + // TODO: Look through vector insert/extract/shuffle. + V = OffsetOk ? V->getUnderlyingObject() : V->stripPointerCasts(); + if (LoadInst *L = dyn_cast<LoadInst>(V)) { + BasicBlock::iterator BBI = L; + BasicBlock *BB = L->getParent(); + SmallPtrSet<BasicBlock *, 4> VisitedBlocks; + for (;;) { + if (!VisitedBlocks.insert(BB)) break; + if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(), + BB, BBI, 6, AA)) + return findValueImpl(U, OffsetOk, Visited); + if (BBI != BB->begin()) break; + BB = BB->getUniquePredecessor(); + if (!BB) break; + BBI = BB->end(); + } + } else if (PHINode *PN = dyn_cast<PHINode>(V)) { + if (Value *W = PN->hasConstantValue(DT)) + return findValueImpl(W, OffsetOk, Visited); + } else if (CastInst *CI = dyn_cast<CastInst>(V)) { + if (CI->isNoopCast(TD ? TD->getIntPtrType(V->getContext()) : + Type::getInt64Ty(V->getContext()))) + return findValueImpl(CI->getOperand(0), OffsetOk, Visited); + } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) { + if (Value *W = FindInsertedValue(Ex->getAggregateOperand(), + Ex->idx_begin(), + Ex->idx_end())) + if (W != V) + return findValueImpl(W, OffsetOk, Visited); + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + // Same as above, but for ConstantExpr instead of Instruction. + if (Instruction::isCast(CE->getOpcode())) { + if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), + CE->getOperand(0)->getType(), + CE->getType(), + TD ? TD->getIntPtrType(V->getContext()) : + Type::getInt64Ty(V->getContext()))) + return findValueImpl(CE->getOperand(0), OffsetOk, Visited); + } else if (CE->getOpcode() == Instruction::ExtractValue) { + const SmallVector<unsigned, 4> &Indices = CE->getIndices(); + if (Value *W = FindInsertedValue(CE->getOperand(0), + Indices.begin(), + Indices.end())) + if (W != V) + return findValueImpl(W, OffsetOk, Visited); + } + } + + // As a last resort, try SimplifyInstruction or constant folding. + if (Instruction *Inst = dyn_cast<Instruction>(V)) { + if (Value *W = SimplifyInstruction(Inst, TD)) + if (W != Inst) + return findValueImpl(W, OffsetOk, Visited); + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + if (Value *W = ConstantFoldConstantExpression(CE, TD)) + if (W != V) + return findValueImpl(W, OffsetOk, Visited); + } + + return V; +} + +//===----------------------------------------------------------------------===// +// Implement the public interfaces to this file... +//===----------------------------------------------------------------------===// + +FunctionPass *llvm::createLintPass() { + return new Lint(); +} + +/// lintFunction - Check a function for errors, printing messages on stderr. +/// +void llvm::lintFunction(const Function &f) { + Function &F = const_cast<Function&>(f); + assert(!F.isDeclaration() && "Cannot lint external functions"); + + FunctionPassManager FPM(F.getParent()); + Lint *V = new Lint(); + FPM.add(V); + FPM.run(F); +} + +/// lintModule - Check a module for errors, printing messages on stderr. +/// +void llvm::lintModule(const Module &M) { + PassManager PM; + Lint *V = new Lint(); + PM.add(V); + PM.run(const_cast<Module&>(M)); +} diff --git a/contrib/llvm/lib/Analysis/LiveValues.cpp b/contrib/llvm/lib/Analysis/LiveValues.cpp new file mode 100644 index 0000000..0225f4f --- /dev/null +++ b/contrib/llvm/lib/Analysis/LiveValues.cpp @@ -0,0 +1,193 @@ +//===- LiveValues.cpp - Liveness information for LLVM IR Values. ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the implementation for the LLVM IR Value liveness +// analysis pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LiveValues.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +using namespace llvm; + +namespace llvm { + FunctionPass *createLiveValuesPass() { return new LiveValues(); } +} + +char LiveValues::ID = 0; +INITIALIZE_PASS(LiveValues, "live-values", + "Value Liveness Analysis", false, true); + +LiveValues::LiveValues() : FunctionPass(ID) {} + +void LiveValues::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<DominatorTree>(); + AU.addRequired<LoopInfo>(); + AU.setPreservesAll(); +} + +bool LiveValues::runOnFunction(Function &F) { + DT = &getAnalysis<DominatorTree>(); + LI = &getAnalysis<LoopInfo>(); + + // This pass' values are computed lazily, so there's nothing to do here. + + return false; +} + +void LiveValues::releaseMemory() { + Memos.clear(); +} + +/// isUsedInBlock - Test if the given value is used in the given block. +/// +bool LiveValues::isUsedInBlock(const Value *V, const BasicBlock *BB) { + Memo &M = getMemo(V); + return M.Used.count(BB); +} + +/// isLiveThroughBlock - Test if the given value is known to be +/// live-through the given block, meaning that the block is properly +/// dominated by the value's definition, and there exists a block +/// reachable from it that contains a use. This uses a conservative +/// approximation that errs on the side of returning false. +/// +bool LiveValues::isLiveThroughBlock(const Value *V, + const BasicBlock *BB) { + Memo &M = getMemo(V); + return M.LiveThrough.count(BB); +} + +/// isKilledInBlock - Test if the given value is known to be killed in +/// the given block, meaning that the block contains a use of the value, +/// and no blocks reachable from the block contain a use. This uses a +/// conservative approximation that errs on the side of returning false. +/// +bool LiveValues::isKilledInBlock(const Value *V, const BasicBlock *BB) { + Memo &M = getMemo(V); + return M.Killed.count(BB); +} + +/// getMemo - Retrieve an existing Memo for the given value if one +/// is available, otherwise compute a new one. +/// +LiveValues::Memo &LiveValues::getMemo(const Value *V) { + DenseMap<const Value *, Memo>::iterator I = Memos.find(V); + if (I != Memos.end()) + return I->second; + return compute(V); +} + +/// getImmediateDominator - A handy utility for the specific DominatorTree +/// query that we need here. +/// +static const BasicBlock *getImmediateDominator(const BasicBlock *BB, + const DominatorTree *DT) { + DomTreeNode *Node = DT->getNode(const_cast<BasicBlock *>(BB))->getIDom(); + return Node ? Node->getBlock() : 0; +} + +/// compute - Compute a new Memo for the given value. +/// +LiveValues::Memo &LiveValues::compute(const Value *V) { + Memo &M = Memos[V]; + + // Determine the block containing the definition. + const BasicBlock *DefBB; + // Instructions define values with meaningful live ranges. + if (const Instruction *I = dyn_cast<Instruction>(V)) + DefBB = I->getParent(); + // Arguments can be analyzed as values defined in the entry block. + else if (const Argument *A = dyn_cast<Argument>(V)) + DefBB = &A->getParent()->getEntryBlock(); + // Constants and other things aren't meaningful here, so just + // return having computed an empty Memo so that we don't come + // here again. The assumption here is that client code won't + // be asking about such values very often. + else + return M; + + // Determine if the value is defined inside a loop. This is used + // to track whether the value is ever used outside the loop, so + // it'll be set to null if the value is either not defined in a + // loop or used outside the loop in which it is defined. + const Loop *L = LI->getLoopFor(DefBB); + + // Track whether the value is used anywhere outside of the block + // in which it is defined. + bool LiveOutOfDefBB = false; + + // Examine each use of the value. + for (Value::const_use_iterator I = V->use_begin(), E = V->use_end(); + I != E; ++I) { + const User *U = *I; + const BasicBlock *UseBB = cast<Instruction>(U)->getParent(); + + // Note the block in which this use occurs. + M.Used.insert(UseBB); + + // If the use block doesn't have successors, the value can be + // considered killed. + if (succ_begin(UseBB) == succ_end(UseBB)) + M.Killed.insert(UseBB); + + // Observe whether the value is used outside of the loop in which + // it is defined. Switch to an enclosing loop if necessary. + for (; L; L = L->getParentLoop()) + if (L->contains(UseBB)) + break; + + // Search for live-through blocks. + const BasicBlock *BB; + if (const PHINode *PHI = dyn_cast<PHINode>(U)) { + // For PHI nodes, start the search at the incoming block paired with the + // incoming value, which must be dominated by the definition. + unsigned Num = PHI->getIncomingValueNumForOperand(I.getOperandNo()); + BB = PHI->getIncomingBlock(Num); + + // A PHI-node use means the value is live-out of it's defining block + // even if that block also contains the only use. + LiveOutOfDefBB = true; + } else { + // Otherwise just start the search at the use. + BB = UseBB; + + // Note if the use is outside the defining block. + LiveOutOfDefBB |= UseBB != DefBB; + } + + // Climb the immediate dominator tree from the use to the definition + // and mark all intermediate blocks as live-through. + for (; BB != DefBB; BB = getImmediateDominator(BB, DT)) { + if (BB != UseBB && !M.LiveThrough.insert(BB)) + break; + } + } + + // If the value is defined inside a loop and is not live outside + // the loop, then each exit block of the loop in which the value + // is used is a kill block. + if (L) { + SmallVector<BasicBlock *, 4> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { + const BasicBlock *ExitingBlock = ExitingBlocks[i]; + if (M.Used.count(ExitingBlock)) + M.Killed.insert(ExitingBlock); + } + } + + // If the value was never used outside the block in which it was + // defined, it's killed in that block. + if (!LiveOutOfDefBB) + M.Killed.insert(DefBB); + + return M; +} diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp new file mode 100644 index 0000000..2ba1d86 --- /dev/null +++ b/contrib/llvm/lib/Analysis/Loads.cpp @@ -0,0 +1,235 @@ +//===- Loads.cpp - Local load analysis ------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines simple local analyses for load instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Target/TargetData.h" +#include "llvm/GlobalAlias.h" +#include "llvm/GlobalVariable.h" +#include "llvm/IntrinsicInst.h" +using namespace llvm; + +/// AreEquivalentAddressValues - Test if A and B will obviously have the same +/// value. This includes recognizing that %t0 and %t1 will have the same +/// value in code like this: +/// %t0 = getelementptr \@a, 0, 3 +/// store i32 0, i32* %t0 +/// %t1 = getelementptr \@a, 0, 3 +/// %t2 = load i32* %t1 +/// +static bool AreEquivalentAddressValues(const Value *A, const Value *B) { + // Test if the values are trivially equivalent. + if (A == B) return true; + + // Test if the values come from identical arithmetic instructions. + // Use isIdenticalToWhenDefined instead of isIdenticalTo because + // this function is only used when one address use dominates the + // other, which means that they'll always either have the same + // value or one of them will have an undefined value. + if (isa<BinaryOperator>(A) || isa<CastInst>(A) || + isa<PHINode>(A) || isa<GetElementPtrInst>(A)) + if (const Instruction *BI = dyn_cast<Instruction>(B)) + if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) + return true; + + // Otherwise they may not be equivalent. + return false; +} + +/// getUnderlyingObjectWithOffset - Strip off up to MaxLookup GEPs and +/// bitcasts to get back to the underlying object being addressed, keeping +/// track of the offset in bytes from the GEPs relative to the result. +/// This is closely related to Value::getUnderlyingObject but is located +/// here to avoid making VMCore depend on TargetData. +static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD, + uint64_t &ByteOffset, + unsigned MaxLookup = 6) { + if (!V->getType()->isPointerTy()) + return V; + for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { + if (!GEP->hasAllConstantIndices()) + return V; + SmallVector<Value*, 8> Indices(GEP->op_begin() + 1, GEP->op_end()); + ByteOffset += TD->getIndexedOffset(GEP->getPointerOperandType(), + &Indices[0], Indices.size()); + V = GEP->getPointerOperand(); + } else if (Operator::getOpcode(V) == Instruction::BitCast) { + V = cast<Operator>(V)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { + if (GA->mayBeOverridden()) + return V; + V = GA->getAliasee(); + } else { + return V; + } + assert(V->getType()->isPointerTy() && "Unexpected operand type!"); + } + return V; +} + +/// isSafeToLoadUnconditionally - Return true if we know that executing a load +/// from this value cannot trap. If it is not obviously safe to load from the +/// specified pointer, we do a quick local scan of the basic block containing +/// ScanFrom, to determine if the address is already accessed. +bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, + unsigned Align, const TargetData *TD) { + uint64_t ByteOffset = 0; + Value *Base = V; + if (TD) + Base = getUnderlyingObjectWithOffset(V, TD, ByteOffset); + + const Type *BaseType = 0; + unsigned BaseAlign = 0; + if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { + // An alloca is safe to load from as load as it is suitably aligned. + BaseType = AI->getAllocatedType(); + BaseAlign = AI->getAlignment(); + } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Base)) { + // Global variables are safe to load from but their size cannot be + // guaranteed if they are overridden. + if (!isa<GlobalAlias>(GV) && !GV->mayBeOverridden()) { + BaseType = GV->getType()->getElementType(); + BaseAlign = GV->getAlignment(); + } + } + + if (BaseType && BaseType->isSized()) { + if (TD && BaseAlign == 0) + BaseAlign = TD->getPrefTypeAlignment(BaseType); + + if (Align <= BaseAlign) { + if (!TD) + return true; // Loading directly from an alloca or global is OK. + + // Check if the load is within the bounds of the underlying object. + const PointerType *AddrTy = cast<PointerType>(V->getType()); + uint64_t LoadSize = TD->getTypeStoreSize(AddrTy->getElementType()); + if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType) && + (Align == 0 || (ByteOffset % Align) == 0)) + return true; + } + } + + // Otherwise, be a little bit aggressive by scanning the local block where we + // want to check to see if the pointer is already being loaded or stored + // from/to. If so, the previous load or store would have already trapped, + // so there is no harm doing an extra load (also, CSE will later eliminate + // the load entirely). + BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin(); + + while (BBI != E) { + --BBI; + + // If we see a free or a call which may write to memory (i.e. which might do + // a free) the pointer could be marked invalid. + if (isa<CallInst>(BBI) && BBI->mayWriteToMemory() && + !isa<DbgInfoIntrinsic>(BBI)) + return false; + + if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { + if (AreEquivalentAddressValues(LI->getOperand(0), V)) return true; + } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { + if (AreEquivalentAddressValues(SI->getOperand(1), V)) return true; + } + } + return false; +} + +/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at the +/// instruction before ScanFrom) checking to see if we have the value at the +/// memory address *Ptr locally available within a small number of instructions. +/// If the value is available, return it. +/// +/// If not, return the iterator for the last validated instruction that the +/// value would be live through. If we scanned the entire block and didn't find +/// something that invalidates *Ptr or provides it, ScanFrom would be left at +/// begin() and this returns null. ScanFrom could also be left +/// +/// MaxInstsToScan specifies the maximum instructions to scan in the block. If +/// it is set to 0, it will scan the whole block. You can also optionally +/// specify an alias analysis implementation, which makes this more precise. +Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, + BasicBlock::iterator &ScanFrom, + unsigned MaxInstsToScan, + AliasAnalysis *AA) { + if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; + + // If we're using alias analysis to disambiguate get the size of *Ptr. + unsigned AccessSize = 0; + if (AA) { + const Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType(); + AccessSize = AA->getTypeStoreSize(AccessTy); + } + + while (ScanFrom != ScanBB->begin()) { + // We must ignore debug info directives when counting (otherwise they + // would affect codegen). + Instruction *Inst = --ScanFrom; + if (isa<DbgInfoIntrinsic>(Inst)) + continue; + + // Restore ScanFrom to expected value in case next test succeeds + ScanFrom++; + + // Don't scan huge blocks. + if (MaxInstsToScan-- == 0) return 0; + + --ScanFrom; + // If this is a load of Ptr, the loaded value is available. + if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) + if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) + return LI; + + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + // If this is a store through Ptr, the value is available! + if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) + return SI->getOperand(0); + + // If Ptr is an alloca and this is a store to a different alloca, ignore + // the store. This is a trivial form of alias analysis that is important + // for reg2mem'd code. + if ((isa<AllocaInst>(Ptr) || isa<GlobalVariable>(Ptr)) && + (isa<AllocaInst>(SI->getOperand(1)) || + isa<GlobalVariable>(SI->getOperand(1)))) + continue; + + // If we have alias analysis and it says the store won't modify the loaded + // value, ignore the store. + if (AA && + (AA->getModRefInfo(SI, Ptr, AccessSize) & AliasAnalysis::Mod) == 0) + continue; + + // Otherwise the store that may or may not alias the pointer, bail out. + ++ScanFrom; + return 0; + } + + // If this is some other instruction that may clobber Ptr, bail out. + if (Inst->mayWriteToMemory()) { + // If alias analysis claims that it really won't modify the load, + // ignore it. + if (AA && + (AA->getModRefInfo(Inst, Ptr, AccessSize) & AliasAnalysis::Mod) == 0) + continue; + + // May modify the pointer, bail out. + ++ScanFrom; + return 0; + } + } + + // Got to the start of the block, we didn't find it, but are done for this + // block. + return 0; +} diff --git a/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp new file mode 100644 index 0000000..82c02dc --- /dev/null +++ b/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp @@ -0,0 +1,351 @@ +//===- LoopDependenceAnalysis.cpp - LDA Implementation ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the (beginning) of an implementation of a loop dependence analysis +// framework, which is used to detect dependences in memory accesses in loops. +// +// Please note that this is work in progress and the interface is subject to +// change. +// +// TODO: adapt as implementation progresses. +// +// TODO: document lingo (pair, subscript, index) +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "lda" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/LoopDependenceAnalysis.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Instructions.h" +#include "llvm/Operator.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +using namespace llvm; + +STATISTIC(NumAnswered, "Number of dependence queries answered"); +STATISTIC(NumAnalysed, "Number of distinct dependence pairs analysed"); +STATISTIC(NumDependent, "Number of pairs with dependent accesses"); +STATISTIC(NumIndependent, "Number of pairs with independent accesses"); +STATISTIC(NumUnknown, "Number of pairs with unknown accesses"); + +LoopPass *llvm::createLoopDependenceAnalysisPass() { + return new LoopDependenceAnalysis(); +} + +INITIALIZE_PASS(LoopDependenceAnalysis, "lda", + "Loop Dependence Analysis", false, true); +char LoopDependenceAnalysis::ID = 0; + +//===----------------------------------------------------------------------===// +// Utility Functions +//===----------------------------------------------------------------------===// + +static inline bool IsMemRefInstr(const Value *V) { + const Instruction *I = dyn_cast<const Instruction>(V); + return I && (I->mayReadFromMemory() || I->mayWriteToMemory()); +} + +static void GetMemRefInstrs(const Loop *L, + SmallVectorImpl<Instruction*> &Memrefs) { + for (Loop::block_iterator b = L->block_begin(), be = L->block_end(); + b != be; ++b) + for (BasicBlock::iterator i = (*b)->begin(), ie = (*b)->end(); + i != ie; ++i) + if (IsMemRefInstr(i)) + Memrefs.push_back(i); +} + +static bool IsLoadOrStoreInst(Value *I) { + return isa<LoadInst>(I) || isa<StoreInst>(I); +} + +static Value *GetPointerOperand(Value *I) { + if (LoadInst *i = dyn_cast<LoadInst>(I)) + return i->getPointerOperand(); + if (StoreInst *i = dyn_cast<StoreInst>(I)) + return i->getPointerOperand(); + llvm_unreachable("Value is no load or store instruction!"); + // Never reached. + return 0; +} + +static AliasAnalysis::AliasResult UnderlyingObjectsAlias(AliasAnalysis *AA, + const Value *A, + const Value *B) { + const Value *aObj = A->getUnderlyingObject(); + const Value *bObj = B->getUnderlyingObject(); + return AA->alias(aObj, AA->getTypeStoreSize(aObj->getType()), + bObj, AA->getTypeStoreSize(bObj->getType())); +} + +static inline const SCEV *GetZeroSCEV(ScalarEvolution *SE) { + return SE->getConstant(Type::getInt32Ty(SE->getContext()), 0L); +} + +//===----------------------------------------------------------------------===// +// Dependence Testing +//===----------------------------------------------------------------------===// + +bool LoopDependenceAnalysis::isDependencePair(const Value *A, + const Value *B) const { + return IsMemRefInstr(A) && + IsMemRefInstr(B) && + (cast<const Instruction>(A)->mayWriteToMemory() || + cast<const Instruction>(B)->mayWriteToMemory()); +} + +bool LoopDependenceAnalysis::findOrInsertDependencePair(Value *A, + Value *B, + DependencePair *&P) { + void *insertPos = 0; + FoldingSetNodeID id; + id.AddPointer(A); + id.AddPointer(B); + + P = Pairs.FindNodeOrInsertPos(id, insertPos); + if (P) return true; + + P = new (PairAllocator) DependencePair(id, A, B); + Pairs.InsertNode(P, insertPos); + return false; +} + +void LoopDependenceAnalysis::getLoops(const SCEV *S, + DenseSet<const Loop*>* Loops) const { + // Refactor this into an SCEVVisitor, if efficiency becomes a concern. + for (const Loop *L = this->L; L != 0; L = L->getParentLoop()) + if (!S->isLoopInvariant(L)) + Loops->insert(L); +} + +bool LoopDependenceAnalysis::isLoopInvariant(const SCEV *S) const { + DenseSet<const Loop*> loops; + getLoops(S, &loops); + return loops.empty(); +} + +bool LoopDependenceAnalysis::isAffine(const SCEV *S) const { + const SCEVAddRecExpr *rec = dyn_cast<SCEVAddRecExpr>(S); + return isLoopInvariant(S) || (rec && rec->isAffine()); +} + +bool LoopDependenceAnalysis::isZIVPair(const SCEV *A, const SCEV *B) const { + return isLoopInvariant(A) && isLoopInvariant(B); +} + +bool LoopDependenceAnalysis::isSIVPair(const SCEV *A, const SCEV *B) const { + DenseSet<const Loop*> loops; + getLoops(A, &loops); + getLoops(B, &loops); + return loops.size() == 1; +} + +LoopDependenceAnalysis::DependenceResult +LoopDependenceAnalysis::analyseZIV(const SCEV *A, + const SCEV *B, + Subscript *S) const { + assert(isZIVPair(A, B) && "Attempted to ZIV-test non-ZIV SCEVs!"); + return A == B ? Dependent : Independent; +} + +LoopDependenceAnalysis::DependenceResult +LoopDependenceAnalysis::analyseSIV(const SCEV *A, + const SCEV *B, + Subscript *S) const { + return Unknown; // TODO: Implement. +} + +LoopDependenceAnalysis::DependenceResult +LoopDependenceAnalysis::analyseMIV(const SCEV *A, + const SCEV *B, + Subscript *S) const { + return Unknown; // TODO: Implement. +} + +LoopDependenceAnalysis::DependenceResult +LoopDependenceAnalysis::analyseSubscript(const SCEV *A, + const SCEV *B, + Subscript *S) const { + DEBUG(dbgs() << " Testing subscript: " << *A << ", " << *B << "\n"); + + if (A == B) { + DEBUG(dbgs() << " -> [D] same SCEV\n"); + return Dependent; + } + + if (!isAffine(A) || !isAffine(B)) { + DEBUG(dbgs() << " -> [?] not affine\n"); + return Unknown; + } + + if (isZIVPair(A, B)) + return analyseZIV(A, B, S); + + if (isSIVPair(A, B)) + return analyseSIV(A, B, S); + + return analyseMIV(A, B, S); +} + +LoopDependenceAnalysis::DependenceResult +LoopDependenceAnalysis::analysePair(DependencePair *P) const { + DEBUG(dbgs() << "Analysing:\n" << *P->A << "\n" << *P->B << "\n"); + + // We only analyse loads and stores but no possible memory accesses by e.g. + // free, call, or invoke instructions. + if (!IsLoadOrStoreInst(P->A) || !IsLoadOrStoreInst(P->B)) { + DEBUG(dbgs() << "--> [?] no load/store\n"); + return Unknown; + } + + Value *aPtr = GetPointerOperand(P->A); + Value *bPtr = GetPointerOperand(P->B); + + switch (UnderlyingObjectsAlias(AA, aPtr, bPtr)) { + case AliasAnalysis::MayAlias: + // We can not analyse objects if we do not know about their aliasing. + DEBUG(dbgs() << "---> [?] may alias\n"); + return Unknown; + + case AliasAnalysis::NoAlias: + // If the objects noalias, they are distinct, accesses are independent. + DEBUG(dbgs() << "---> [I] no alias\n"); + return Independent; + + case AliasAnalysis::MustAlias: + break; // The underlying objects alias, test accesses for dependence. + } + + const GEPOperator *aGEP = dyn_cast<GEPOperator>(aPtr); + const GEPOperator *bGEP = dyn_cast<GEPOperator>(bPtr); + + if (!aGEP || !bGEP) + return Unknown; + + // FIXME: Is filtering coupled subscripts necessary? + + // Collect GEP operand pairs (FIXME: use GetGEPOperands from BasicAA), adding + // trailing zeroes to the smaller GEP, if needed. + typedef SmallVector<std::pair<const SCEV*, const SCEV*>, 4> GEPOpdPairsTy; + GEPOpdPairsTy opds; + for(GEPOperator::const_op_iterator aIdx = aGEP->idx_begin(), + aEnd = aGEP->idx_end(), + bIdx = bGEP->idx_begin(), + bEnd = bGEP->idx_end(); + aIdx != aEnd && bIdx != bEnd; + aIdx += (aIdx != aEnd), bIdx += (bIdx != bEnd)) { + const SCEV* aSCEV = (aIdx != aEnd) ? SE->getSCEV(*aIdx) : GetZeroSCEV(SE); + const SCEV* bSCEV = (bIdx != bEnd) ? SE->getSCEV(*bIdx) : GetZeroSCEV(SE); + opds.push_back(std::make_pair(aSCEV, bSCEV)); + } + + if (!opds.empty() && opds[0].first != opds[0].second) { + // We cannot (yet) handle arbitrary GEP pointer offsets. By limiting + // + // TODO: this could be relaxed by adding the size of the underlying object + // to the first subscript. If we have e.g. (GEP x,0,i; GEP x,2,-i) and we + // know that x is a [100 x i8]*, we could modify the first subscript to be + // (i, 200-i) instead of (i, -i). + return Unknown; + } + + // Now analyse the collected operand pairs (skipping the GEP ptr offsets). + for (GEPOpdPairsTy::const_iterator i = opds.begin() + 1, end = opds.end(); + i != end; ++i) { + Subscript subscript; + DependenceResult result = analyseSubscript(i->first, i->second, &subscript); + if (result != Dependent) { + // We either proved independence or failed to analyse this subscript. + // Further subscripts will not improve the situation, so abort early. + return result; + } + P->Subscripts.push_back(subscript); + } + // We successfully analysed all subscripts but failed to prove independence. + return Dependent; +} + +bool LoopDependenceAnalysis::depends(Value *A, Value *B) { + assert(isDependencePair(A, B) && "Values form no dependence pair!"); + ++NumAnswered; + + DependencePair *p; + if (!findOrInsertDependencePair(A, B, p)) { + // The pair is not cached, so analyse it. + ++NumAnalysed; + switch (p->Result = analysePair(p)) { + case Dependent: ++NumDependent; break; + case Independent: ++NumIndependent; break; + case Unknown: ++NumUnknown; break; + } + } + return p->Result != Independent; +} + +//===----------------------------------------------------------------------===// +// LoopDependenceAnalysis Implementation +//===----------------------------------------------------------------------===// + +bool LoopDependenceAnalysis::runOnLoop(Loop *L, LPPassManager &) { + this->L = L; + AA = &getAnalysis<AliasAnalysis>(); + SE = &getAnalysis<ScalarEvolution>(); + return false; +} + +void LoopDependenceAnalysis::releaseMemory() { + Pairs.clear(); + PairAllocator.Reset(); +} + +void LoopDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<AliasAnalysis>(); + AU.addRequiredTransitive<ScalarEvolution>(); +} + +static void PrintLoopInfo(raw_ostream &OS, + LoopDependenceAnalysis *LDA, const Loop *L) { + if (!L->empty()) return; // ignore non-innermost loops + + SmallVector<Instruction*, 8> memrefs; + GetMemRefInstrs(L, memrefs); + + OS << "Loop at depth " << L->getLoopDepth() << ", header block: "; + WriteAsOperand(OS, L->getHeader(), false); + OS << "\n"; + + OS << " Load/store instructions: " << memrefs.size() << "\n"; + for (SmallVector<Instruction*, 8>::const_iterator x = memrefs.begin(), + end = memrefs.end(); x != end; ++x) + OS << "\t" << (x - memrefs.begin()) << ": " << **x << "\n"; + + OS << " Pairwise dependence results:\n"; + for (SmallVector<Instruction*, 8>::const_iterator x = memrefs.begin(), + end = memrefs.end(); x != end; ++x) + for (SmallVector<Instruction*, 8>::const_iterator y = x + 1; + y != end; ++y) + if (LDA->isDependencePair(*x, *y)) + OS << "\t" << (x - memrefs.begin()) << "," << (y - memrefs.begin()) + << ": " << (LDA->depends(*x, *y) ? "dependent" : "independent") + << "\n"; +} + +void LoopDependenceAnalysis::print(raw_ostream &OS, const Module*) const { + // TODO: doc why const_cast is safe + PrintLoopInfo(OS, const_cast<LoopDependenceAnalysis*>(this), this->L); +} diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp new file mode 100644 index 0000000..46219d1 --- /dev/null +++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp @@ -0,0 +1,413 @@ +//===- LoopInfo.cpp - Natural Loop Calculator -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the LoopInfo class that is used to identify natural loops +// and determine the loop depth of various nodes of the CFG. Note that the +// loops identified may actually be several natural loops that share the same +// header node... not just a single natural loop. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include <algorithm> +using namespace llvm; + +// Always verify loopinfo if expensive checking is enabled. +#ifdef XDEBUG +static bool VerifyLoopInfo = true; +#else +static bool VerifyLoopInfo = false; +#endif +static cl::opt<bool,true> +VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo), + cl::desc("Verify loop info (time consuming)")); + +char LoopInfo::ID = 0; +INITIALIZE_PASS(LoopInfo, "loops", "Natural Loop Information", true, true); + +//===----------------------------------------------------------------------===// +// Loop implementation +// + +/// isLoopInvariant - Return true if the specified value is loop invariant +/// +bool Loop::isLoopInvariant(Value *V) const { + if (Instruction *I = dyn_cast<Instruction>(V)) + return isLoopInvariant(I); + return true; // All non-instructions are loop invariant +} + +/// isLoopInvariant - Return true if the specified instruction is +/// loop-invariant. +/// +bool Loop::isLoopInvariant(Instruction *I) const { + return !contains(I); +} + +/// makeLoopInvariant - If the given value is an instruciton inside of the +/// loop and it can be hoisted, do so to make it trivially loop-invariant. +/// Return true if the value after any hoisting is loop invariant. This +/// function can be used as a slightly more aggressive replacement for +/// isLoopInvariant. +/// +/// If InsertPt is specified, it is the point to hoist instructions to. +/// If null, the terminator of the loop preheader is used. +/// +bool Loop::makeLoopInvariant(Value *V, bool &Changed, + Instruction *InsertPt) const { + if (Instruction *I = dyn_cast<Instruction>(V)) + return makeLoopInvariant(I, Changed, InsertPt); + return true; // All non-instructions are loop-invariant. +} + +/// makeLoopInvariant - If the given instruction is inside of the +/// loop and it can be hoisted, do so to make it trivially loop-invariant. +/// Return true if the instruction after any hoisting is loop invariant. This +/// function can be used as a slightly more aggressive replacement for +/// isLoopInvariant. +/// +/// If InsertPt is specified, it is the point to hoist instructions to. +/// If null, the terminator of the loop preheader is used. +/// +bool Loop::makeLoopInvariant(Instruction *I, bool &Changed, + Instruction *InsertPt) const { + // Test if the value is already loop-invariant. + if (isLoopInvariant(I)) + return true; + if (!I->isSafeToSpeculativelyExecute()) + return false; + if (I->mayReadFromMemory()) + return false; + // Determine the insertion point, unless one was given. + if (!InsertPt) { + BasicBlock *Preheader = getLoopPreheader(); + // Without a preheader, hoisting is not feasible. + if (!Preheader) + return false; + InsertPt = Preheader->getTerminator(); + } + // Don't hoist instructions with loop-variant operands. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (!makeLoopInvariant(I->getOperand(i), Changed, InsertPt)) + return false; + // Hoist. + I->moveBefore(InsertPt); + Changed = true; + return true; +} + +/// getCanonicalInductionVariable - Check to see if the loop has a canonical +/// induction variable: an integer recurrence that starts at 0 and increments +/// by one each time through the loop. If so, return the phi node that +/// corresponds to it. +/// +/// The IndVarSimplify pass transforms loops to have a canonical induction +/// variable. +/// +PHINode *Loop::getCanonicalInductionVariable() const { + BasicBlock *H = getHeader(); + + BasicBlock *Incoming = 0, *Backedge = 0; + pred_iterator PI = pred_begin(H); + assert(PI != pred_end(H) && + "Loop must have at least one backedge!"); + Backedge = *PI++; + if (PI == pred_end(H)) return 0; // dead loop + Incoming = *PI++; + if (PI != pred_end(H)) return 0; // multiple backedges? + + if (contains(Incoming)) { + if (contains(Backedge)) + return 0; + std::swap(Incoming, Backedge); + } else if (!contains(Backedge)) + return 0; + + // Loop over all of the PHI nodes, looking for a canonical indvar. + for (BasicBlock::iterator I = H->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + if (ConstantInt *CI = + dyn_cast<ConstantInt>(PN->getIncomingValueForBlock(Incoming))) + if (CI->isNullValue()) + if (Instruction *Inc = + dyn_cast<Instruction>(PN->getIncomingValueForBlock(Backedge))) + if (Inc->getOpcode() == Instruction::Add && + Inc->getOperand(0) == PN) + if (ConstantInt *CI = dyn_cast<ConstantInt>(Inc->getOperand(1))) + if (CI->equalsInt(1)) + return PN; + } + return 0; +} + +/// getTripCount - Return a loop-invariant LLVM value indicating the number of +/// times the loop will be executed. Note that this means that the backedge +/// of the loop executes N-1 times. If the trip-count cannot be determined, +/// this returns null. +/// +/// The IndVarSimplify pass transforms loops to have a form that this +/// function easily understands. +/// +Value *Loop::getTripCount() const { + // Canonical loops will end with a 'cmp ne I, V', where I is the incremented + // canonical induction variable and V is the trip count of the loop. + PHINode *IV = getCanonicalInductionVariable(); + if (IV == 0 || IV->getNumIncomingValues() != 2) return 0; + + bool P0InLoop = contains(IV->getIncomingBlock(0)); + Value *Inc = IV->getIncomingValue(!P0InLoop); + BasicBlock *BackedgeBlock = IV->getIncomingBlock(!P0InLoop); + + if (BranchInst *BI = dyn_cast<BranchInst>(BackedgeBlock->getTerminator())) + if (BI->isConditional()) { + if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) { + if (ICI->getOperand(0) == Inc) { + if (BI->getSuccessor(0) == getHeader()) { + if (ICI->getPredicate() == ICmpInst::ICMP_NE) + return ICI->getOperand(1); + } else if (ICI->getPredicate() == ICmpInst::ICMP_EQ) { + return ICI->getOperand(1); + } + } + } + } + + return 0; +} + +/// getSmallConstantTripCount - Returns the trip count of this loop as a +/// normal unsigned value, if possible. Returns 0 if the trip count is unknown +/// of not constant. Will also return 0 if the trip count is very large +/// (>= 2^32) +unsigned Loop::getSmallConstantTripCount() const { + Value* TripCount = this->getTripCount(); + if (TripCount) { + if (ConstantInt *TripCountC = dyn_cast<ConstantInt>(TripCount)) { + // Guard against huge trip counts. + if (TripCountC->getValue().getActiveBits() <= 32) { + return (unsigned)TripCountC->getZExtValue(); + } + } + } + return 0; +} + +/// getSmallConstantTripMultiple - Returns the largest constant divisor of the +/// trip count of this loop as a normal unsigned value, if possible. This +/// means that the actual trip count is always a multiple of the returned +/// value (don't forget the trip count could very well be zero as well!). +/// +/// Returns 1 if the trip count is unknown or not guaranteed to be the +/// multiple of a constant (which is also the case if the trip count is simply +/// constant, use getSmallConstantTripCount for that case), Will also return 1 +/// if the trip count is very large (>= 2^32). +unsigned Loop::getSmallConstantTripMultiple() const { + Value* TripCount = this->getTripCount(); + // This will hold the ConstantInt result, if any + ConstantInt *Result = NULL; + if (TripCount) { + // See if the trip count is constant itself + Result = dyn_cast<ConstantInt>(TripCount); + // if not, see if it is a multiplication + if (!Result) + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TripCount)) { + switch (BO->getOpcode()) { + case BinaryOperator::Mul: + Result = dyn_cast<ConstantInt>(BO->getOperand(1)); + break; + case BinaryOperator::Shl: + if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) + if (CI->getValue().getActiveBits() <= 5) + return 1u << CI->getZExtValue(); + break; + default: + break; + } + } + } + // Guard against huge trip counts. + if (Result && Result->getValue().getActiveBits() <= 32) { + return (unsigned)Result->getZExtValue(); + } else { + return 1; + } +} + +/// isLCSSAForm - Return true if the Loop is in LCSSA form +bool Loop::isLCSSAForm(DominatorTree &DT) const { + // Sort the blocks vector so that we can use binary search to do quick + // lookups. + SmallPtrSet<BasicBlock*, 16> LoopBBs(block_begin(), block_end()); + + for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) { + BasicBlock *BB = *BI; + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I) + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; + ++UI) { + User *U = *UI; + BasicBlock *UserBB = cast<Instruction>(U)->getParent(); + if (PHINode *P = dyn_cast<PHINode>(U)) + UserBB = P->getIncomingBlock(UI); + + // Check the current block, as a fast-path, before checking whether + // the use is anywhere in the loop. Most values are used in the same + // block they are defined in. Also, blocks not reachable from the + // entry are special; uses in them don't need to go through PHIs. + if (UserBB != BB && + !LoopBBs.count(UserBB) && + DT.isReachableFromEntry(UserBB)) + return false; + } + } + + return true; +} + +/// isLoopSimplifyForm - Return true if the Loop is in the form that +/// the LoopSimplify form transforms loops to, which is sometimes called +/// normal form. +bool Loop::isLoopSimplifyForm() const { + // Normal-form loops have a preheader, a single backedge, and all of their + // exits have all their predecessors inside the loop. + return getLoopPreheader() && getLoopLatch() && hasDedicatedExits(); +} + +/// hasDedicatedExits - Return true if no exit block for the loop +/// has a predecessor that is outside the loop. +bool Loop::hasDedicatedExits() const { + // Sort the blocks vector so that we can use binary search to do quick + // lookups. + SmallPtrSet<BasicBlock *, 16> LoopBBs(block_begin(), block_end()); + // Each predecessor of each exit block of a normal loop is contained + // within the loop. + SmallVector<BasicBlock *, 4> ExitBlocks; + getExitBlocks(ExitBlocks); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + for (pred_iterator PI = pred_begin(ExitBlocks[i]), + PE = pred_end(ExitBlocks[i]); PI != PE; ++PI) + if (!LoopBBs.count(*PI)) + return false; + // All the requirements are met. + return true; +} + +/// getUniqueExitBlocks - Return all unique successor blocks of this loop. +/// These are the blocks _outside of the current loop_ which are branched to. +/// This assumes that loop exits are in canonical form. +/// +void +Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const { + assert(hasDedicatedExits() && + "getUniqueExitBlocks assumes the loop has canonical form exits!"); + + // Sort the blocks vector so that we can use binary search to do quick + // lookups. + SmallVector<BasicBlock *, 128> LoopBBs(block_begin(), block_end()); + std::sort(LoopBBs.begin(), LoopBBs.end()); + + SmallVector<BasicBlock *, 32> switchExitBlocks; + + for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) { + + BasicBlock *current = *BI; + switchExitBlocks.clear(); + + for (succ_iterator I = succ_begin(*BI), E = succ_end(*BI); I != E; ++I) { + // If block is inside the loop then it is not a exit block. + if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I)) + continue; + + pred_iterator PI = pred_begin(*I); + BasicBlock *firstPred = *PI; + + // If current basic block is this exit block's first predecessor + // then only insert exit block in to the output ExitBlocks vector. + // This ensures that same exit block is not inserted twice into + // ExitBlocks vector. + if (current != firstPred) + continue; + + // If a terminator has more then two successors, for example SwitchInst, + // then it is possible that there are multiple edges from current block + // to one exit block. + if (std::distance(succ_begin(current), succ_end(current)) <= 2) { + ExitBlocks.push_back(*I); + continue; + } + + // In case of multiple edges from current block to exit block, collect + // only one edge in ExitBlocks. Use switchExitBlocks to keep track of + // duplicate edges. + if (std::find(switchExitBlocks.begin(), switchExitBlocks.end(), *I) + == switchExitBlocks.end()) { + switchExitBlocks.push_back(*I); + ExitBlocks.push_back(*I); + } + } + } +} + +/// getUniqueExitBlock - If getUniqueExitBlocks would return exactly one +/// block, return that block. Otherwise return null. +BasicBlock *Loop::getUniqueExitBlock() const { + SmallVector<BasicBlock *, 8> UniqueExitBlocks; + getUniqueExitBlocks(UniqueExitBlocks); + if (UniqueExitBlocks.size() == 1) + return UniqueExitBlocks[0]; + return 0; +} + +void Loop::dump() const { + print(dbgs()); +} + +//===----------------------------------------------------------------------===// +// LoopInfo implementation +// +bool LoopInfo::runOnFunction(Function &) { + releaseMemory(); + LI.Calculate(getAnalysis<DominatorTree>().getBase()); // Update + return false; +} + +void LoopInfo::verifyAnalysis() const { + // LoopInfo is a FunctionPass, but verifying every loop in the function + // each time verifyAnalysis is called is very expensive. The + // -verify-loop-info option can enable this. In order to perform some + // checking by default, LoopPass has been taught to call verifyLoop + // manually during loop pass sequences. + + if (!VerifyLoopInfo) return; + + for (iterator I = begin(), E = end(); I != E; ++I) { + assert(!(*I)->getParentLoop() && "Top-level loop has a parent!"); + (*I)->verifyLoopNest(); + } + + // TODO: check BBMap consistency. +} + +void LoopInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<DominatorTree>(); +} + +void LoopInfo::print(raw_ostream &OS, const Module*) const { + LI.print(OS); +} + diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp new file mode 100644 index 0000000..15d4db8 --- /dev/null +++ b/contrib/llvm/lib/Analysis/LoopPass.cpp @@ -0,0 +1,404 @@ +//===- LoopPass.cpp - Loop Pass and Loop Pass Manager ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements LoopPass and LPPassManager. All loop optimization +// and transformation passes are derived from LoopPass. LPPassManager is +// responsible for managing LoopPasses. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Assembly/PrintModulePass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Timer.h" +using namespace llvm; + +namespace { + +/// PrintLoopPass - Print a Function corresponding to a Loop. +/// +class PrintLoopPass : public LoopPass { +private: + std::string Banner; + raw_ostream &Out; // raw_ostream to print on. + +public: + static char ID; + PrintLoopPass() : LoopPass(ID), Out(dbgs()) {} + PrintLoopPass(const std::string &B, raw_ostream &o) + : LoopPass(ID), Banner(B), Out(o) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + bool runOnLoop(Loop *L, LPPassManager &) { + Out << Banner; + for (Loop::block_iterator b = L->block_begin(), be = L->block_end(); + b != be; + ++b) { + (*b)->print(Out); + } + return false; + } +}; + +char PrintLoopPass::ID = 0; +} + +//===----------------------------------------------------------------------===// +// LPPassManager +// + +char LPPassManager::ID = 0; + +LPPassManager::LPPassManager(int Depth) + : FunctionPass(ID), PMDataManager(Depth) { + skipThisLoop = false; + redoThisLoop = false; + LI = NULL; + CurrentLoop = NULL; +} + +/// Delete loop from the loop queue and loop hierarchy (LoopInfo). +void LPPassManager::deleteLoopFromQueue(Loop *L) { + + if (Loop *ParentLoop = L->getParentLoop()) { // Not a top-level loop. + // Reparent all of the blocks in this loop. Since BBLoop had a parent, + // they are now all in it. + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) + if (LI->getLoopFor(*I) == L) // Don't change blocks in subloops. + LI->changeLoopFor(*I, ParentLoop); + + // Remove the loop from its parent loop. + for (Loop::iterator I = ParentLoop->begin(), E = ParentLoop->end();; + ++I) { + assert(I != E && "Couldn't find loop"); + if (*I == L) { + ParentLoop->removeChildLoop(I); + break; + } + } + + // Move all subloops into the parent loop. + while (!L->empty()) + ParentLoop->addChildLoop(L->removeChildLoop(L->end()-1)); + } else { + // Reparent all of the blocks in this loop. Since BBLoop had no parent, + // they no longer in a loop at all. + + for (unsigned i = 0; i != L->getBlocks().size(); ++i) { + // Don't change blocks in subloops. + if (LI->getLoopFor(L->getBlocks()[i]) == L) { + LI->removeBlock(L->getBlocks()[i]); + --i; + } + } + + // Remove the loop from the top-level LoopInfo object. + for (LoopInfo::iterator I = LI->begin(), E = LI->end();; ++I) { + assert(I != E && "Couldn't find loop"); + if (*I == L) { + LI->removeLoop(I); + break; + } + } + + // Move all of the subloops to the top-level. + while (!L->empty()) + LI->addTopLevelLoop(L->removeChildLoop(L->end()-1)); + } + + delete L; + + // If L is current loop then skip rest of the passes and let + // runOnFunction remove L from LQ. Otherwise, remove L from LQ now + // and continue applying other passes on CurrentLoop. + if (CurrentLoop == L) { + skipThisLoop = true; + return; + } + + for (std::deque<Loop *>::iterator I = LQ.begin(), + E = LQ.end(); I != E; ++I) { + if (*I == L) { + LQ.erase(I); + break; + } + } +} + +// Inset loop into loop nest (LoopInfo) and loop queue (LQ). +void LPPassManager::insertLoop(Loop *L, Loop *ParentLoop) { + + assert (CurrentLoop != L && "Cannot insert CurrentLoop"); + + // Insert into loop nest + if (ParentLoop) + ParentLoop->addChildLoop(L); + else + LI->addTopLevelLoop(L); + + insertLoopIntoQueue(L); +} + +void LPPassManager::insertLoopIntoQueue(Loop *L) { + // Insert L into loop queue + if (L == CurrentLoop) + redoLoop(L); + else if (!L->getParentLoop()) + // This is top level loop. + LQ.push_front(L); + else { + // Insert L after the parent loop. + for (std::deque<Loop *>::iterator I = LQ.begin(), + E = LQ.end(); I != E; ++I) { + if (*I == L->getParentLoop()) { + // deque does not support insert after. + ++I; + LQ.insert(I, 1, L); + break; + } + } + } +} + +// Reoptimize this loop. LPPassManager will re-insert this loop into the +// queue. This allows LoopPass to change loop nest for the loop. This +// utility may send LPPassManager into infinite loops so use caution. +void LPPassManager::redoLoop(Loop *L) { + assert (CurrentLoop == L && "Can redo only CurrentLoop"); + redoThisLoop = true; +} + +/// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for +/// all loop passes. +void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From, + BasicBlock *To, Loop *L) { + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + LoopPass *LP = getContainedPass(Index); + LP->cloneBasicBlockAnalysis(From, To, L); + } +} + +/// deleteSimpleAnalysisValue - Invoke deleteAnalysisValue hook for all passes. +void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) { + if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) { + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; + ++BI) { + Instruction &I = *BI; + deleteSimpleAnalysisValue(&I, L); + } + } + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + LoopPass *LP = getContainedPass(Index); + LP->deleteAnalysisValue(V, L); + } +} + + +// Recurse through all subloops and all loops into LQ. +static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) { + LQ.push_back(L); + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + addLoopIntoQueue(*I, LQ); +} + +/// Pass Manager itself does not invalidate any analysis info. +void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const { + // LPPassManager needs LoopInfo. In the long term LoopInfo class will + // become part of LPPassManager. + Info.addRequired<LoopInfo>(); + Info.setPreservesAll(); +} + +/// run - Execute all of the passes scheduled for execution. Keep track of +/// whether any of the passes modifies the function, and if so, return true. +bool LPPassManager::runOnFunction(Function &F) { + LI = &getAnalysis<LoopInfo>(); + bool Changed = false; + + // Collect inherited analysis from Module level pass manager. + populateInheritedAnalysis(TPM->activeStack); + + // Populate Loop Queue + for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + addLoopIntoQueue(*I, LQ); + + if (LQ.empty()) // No loops, skip calling finalizers + return false; + + // Initialization + for (std::deque<Loop *>::const_iterator I = LQ.begin(), E = LQ.end(); + I != E; ++I) { + Loop *L = *I; + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + LoopPass *P = getContainedPass(Index); + Changed |= P->doInitialization(L, *this); + } + } + + // Walk Loops + while (!LQ.empty()) { + + CurrentLoop = LQ.back(); + skipThisLoop = false; + redoThisLoop = false; + + // Run all passes on the current Loop. + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + LoopPass *P = getContainedPass(Index); + + dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG, + CurrentLoop->getHeader()->getName()); + dumpRequiredSet(P); + + initializeAnalysisImpl(P); + + { + PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader()); + TimeRegion PassTimer(getPassTimer(P)); + + Changed |= P->runOnLoop(CurrentLoop, *this); + } + + if (Changed) + dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG, + skipThisLoop ? "<deleted>" : + CurrentLoop->getHeader()->getName()); + dumpPreservedSet(P); + + if (!skipThisLoop) { + // Manually check that this loop is still healthy. This is done + // instead of relying on LoopInfo::verifyLoop since LoopInfo + // is a function pass and it's really expensive to verify every + // loop in the function every time. That level of checking can be + // enabled with the -verify-loop-info option. + { + TimeRegion PassTimer(getPassTimer(LI)); + CurrentLoop->verifyLoop(); + } + + // Then call the regular verifyAnalysis functions. + verifyPreservedAnalysis(P); + } + + removeNotPreservedAnalysis(P); + recordAvailableAnalysis(P); + removeDeadPasses(P, + skipThisLoop ? "<deleted>" : + CurrentLoop->getHeader()->getName(), + ON_LOOP_MSG); + + if (skipThisLoop) + // Do not run other passes on this loop. + break; + } + + // If the loop was deleted, release all the loop passes. This frees up + // some memory, and avoids trouble with the pass manager trying to call + // verifyAnalysis on them. + if (skipThisLoop) + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + freePass(P, "<deleted>", ON_LOOP_MSG); + } + + // Pop the loop from queue after running all passes. + LQ.pop_back(); + + if (redoThisLoop) + LQ.push_back(CurrentLoop); + } + + // Finalization + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + LoopPass *P = getContainedPass(Index); + Changed |= P->doFinalization(); + } + + return Changed; +} + +/// Print passes managed by this manager +void LPPassManager::dumpPassStructure(unsigned Offset) { + errs().indent(Offset*2) << "Loop Pass Manager\n"; + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + P->dumpPassStructure(Offset + 1); + dumpLastUses(P, Offset+1); + } +} + + +//===----------------------------------------------------------------------===// +// LoopPass + +Pass *LoopPass::createPrinterPass(raw_ostream &O, + const std::string &Banner) const { + return new PrintLoopPass(Banner, O); +} + +// Check if this pass is suitable for the current LPPassManager, if +// available. This pass P is not suitable for a LPPassManager if P +// is not preserving higher level analysis info used by other +// LPPassManager passes. In such case, pop LPPassManager from the +// stack. This will force assignPassManager() to create new +// LPPassManger as expected. +void LoopPass::preparePassManager(PMStack &PMS) { + + // Find LPPassManager + while (!PMS.empty() && + PMS.top()->getPassManagerType() > PMT_LoopPassManager) + PMS.pop(); + + // If this pass is destroying high level information that is used + // by other passes that are managed by LPM then do not insert + // this pass in current LPM. Use new LPPassManager. + if (PMS.top()->getPassManagerType() == PMT_LoopPassManager && + !PMS.top()->preserveHigherLevelAnalysis(this)) + PMS.pop(); +} + +/// Assign pass manager to manage this pass. +void LoopPass::assignPassManager(PMStack &PMS, + PassManagerType PreferredType) { + // Find LPPassManager + while (!PMS.empty() && + PMS.top()->getPassManagerType() > PMT_LoopPassManager) + PMS.pop(); + + LPPassManager *LPPM; + if (PMS.top()->getPassManagerType() == PMT_LoopPassManager) + LPPM = (LPPassManager*)PMS.top(); + else { + // Create new Loop Pass Manager if it does not exist. + assert (!PMS.empty() && "Unable to create Loop Pass Manager"); + PMDataManager *PMD = PMS.top(); + + // [1] Create new Call Graph Pass Manager + LPPM = new LPPassManager(PMD->getDepth() + 1); + LPPM->populateInheritedAnalysis(PMS); + + // [2] Set up new manager's top level manager + PMTopLevelManager *TPM = PMD->getTopLevelManager(); + TPM->addIndirectPassManager(LPPM); + + // [3] Assign manager to manage this new manager. This may create + // and push new managers into PMS + Pass *P = LPPM->getAsPass(); + TPM->schedulePass(P); + + // [4] Push new manager into PMS + PMS.push(LPPM); + } + + LPPM->add(this); +} diff --git a/contrib/llvm/lib/Analysis/Makefile b/contrib/llvm/lib/Analysis/Makefile new file mode 100644 index 0000000..4af6d35 --- /dev/null +++ b/contrib/llvm/lib/Analysis/Makefile @@ -0,0 +1,16 @@ +##===- lib/Analysis/Makefile -------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +LIBRARYNAME = LLVMAnalysis +DIRS = IPA +BUILD_ARCHIVE = 1 + +include $(LEVEL)/Makefile.common + diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp new file mode 100644 index 0000000..1ab18ca --- /dev/null +++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -0,0 +1,207 @@ +//===------ MemoryBuiltins.cpp - Identify calls to memory builtins --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of functions identifies calls to builtin functions that allocate +// or free memory. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Target/TargetData.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// malloc Call Utility Functions. +// + +/// isMalloc - Returns true if the value is either a malloc call or a +/// bitcast of the result of a malloc call. +bool llvm::isMalloc(const Value *I) { + return extractMallocCall(I) || extractMallocCallFromBitCast(I); +} + +static bool isMallocCall(const CallInst *CI) { + if (!CI) + return false; + + Function *Callee = CI->getCalledFunction(); + if (Callee == 0 || !Callee->isDeclaration() || Callee->getName() != "malloc") + return false; + + // Check malloc prototype. + // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin + // attribute will exist. + const FunctionType *FTy = Callee->getFunctionType(); + if (FTy->getNumParams() != 1) + return false; + if (IntegerType *ITy = dyn_cast<IntegerType>(FTy->param_begin()->get())) { + if (ITy->getBitWidth() != 32 && ITy->getBitWidth() != 64) + return false; + return true; + } + + return false; +} + +/// extractMallocCall - Returns the corresponding CallInst if the instruction +/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we +/// ignore InvokeInst here. +const CallInst *llvm::extractMallocCall(const Value *I) { + const CallInst *CI = dyn_cast<CallInst>(I); + return (isMallocCall(CI)) ? CI : NULL; +} + +CallInst *llvm::extractMallocCall(Value *I) { + CallInst *CI = dyn_cast<CallInst>(I); + return (isMallocCall(CI)) ? CI : NULL; +} + +static bool isBitCastOfMallocCall(const BitCastInst *BCI) { + if (!BCI) + return false; + + return isMallocCall(dyn_cast<CallInst>(BCI->getOperand(0))); +} + +/// extractMallocCallFromBitCast - Returns the corresponding CallInst if the +/// instruction is a bitcast of the result of a malloc call. +CallInst *llvm::extractMallocCallFromBitCast(Value *I) { + BitCastInst *BCI = dyn_cast<BitCastInst>(I); + return (isBitCastOfMallocCall(BCI)) ? cast<CallInst>(BCI->getOperand(0)) + : NULL; +} + +const CallInst *llvm::extractMallocCallFromBitCast(const Value *I) { + const BitCastInst *BCI = dyn_cast<BitCastInst>(I); + return (isBitCastOfMallocCall(BCI)) ? cast<CallInst>(BCI->getOperand(0)) + : NULL; +} + +static Value *computeArraySize(const CallInst *CI, const TargetData *TD, + bool LookThroughSExt = false) { + if (!CI) + return NULL; + + // The size of the malloc's result type must be known to determine array size. + const Type *T = getMallocAllocatedType(CI); + if (!T || !T->isSized() || !TD) + return NULL; + + unsigned ElementSize = TD->getTypeAllocSize(T); + if (const StructType *ST = dyn_cast<StructType>(T)) + ElementSize = TD->getStructLayout(ST)->getSizeInBytes(); + + // If malloc call's arg can be determined to be a multiple of ElementSize, + // return the multiple. Otherwise, return NULL. + Value *MallocArg = CI->getArgOperand(0); + Value *Multiple = NULL; + if (ComputeMultiple(MallocArg, ElementSize, Multiple, + LookThroughSExt)) + return Multiple; + + return NULL; +} + +/// isArrayMalloc - Returns the corresponding CallInst if the instruction +/// is a call to malloc whose array size can be determined and the array size +/// is not constant 1. Otherwise, return NULL. +const CallInst *llvm::isArrayMalloc(const Value *I, const TargetData *TD) { + const CallInst *CI = extractMallocCall(I); + Value *ArraySize = computeArraySize(CI, TD); + + if (ArraySize && + ArraySize != ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) + return CI; + + // CI is a non-array malloc or we can't figure out that it is an array malloc. + return NULL; +} + +/// getMallocType - Returns the PointerType resulting from the malloc call. +/// The PointerType depends on the number of bitcast uses of the malloc call: +/// 0: PointerType is the calls' return type. +/// 1: PointerType is the bitcast's result type. +/// >1: Unique PointerType cannot be determined, return NULL. +const PointerType *llvm::getMallocType(const CallInst *CI) { + assert(isMalloc(CI) && "getMallocType and not malloc call"); + + const PointerType *MallocType = NULL; + unsigned NumOfBitCastUses = 0; + + // Determine if CallInst has a bitcast use. + for (Value::const_use_iterator UI = CI->use_begin(), E = CI->use_end(); + UI != E; ) + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(*UI++)) { + MallocType = cast<PointerType>(BCI->getDestTy()); + NumOfBitCastUses++; + } + + // Malloc call has 1 bitcast use, so type is the bitcast's destination type. + if (NumOfBitCastUses == 1) + return MallocType; + + // Malloc call was not bitcast, so type is the malloc function's return type. + if (NumOfBitCastUses == 0) + return cast<PointerType>(CI->getType()); + + // Type could not be determined. + return NULL; +} + +/// getMallocAllocatedType - Returns the Type allocated by malloc call. +/// The Type depends on the number of bitcast uses of the malloc call: +/// 0: PointerType is the malloc calls' return type. +/// 1: PointerType is the bitcast's result type. +/// >1: Unique PointerType cannot be determined, return NULL. +const Type *llvm::getMallocAllocatedType(const CallInst *CI) { + const PointerType *PT = getMallocType(CI); + return PT ? PT->getElementType() : NULL; +} + +/// getMallocArraySize - Returns the array size of a malloc call. If the +/// argument passed to malloc is a multiple of the size of the malloced type, +/// then return that multiple. For non-array mallocs, the multiple is +/// constant 1. Otherwise, return NULL for mallocs whose array size cannot be +/// determined. +Value *llvm::getMallocArraySize(CallInst *CI, const TargetData *TD, + bool LookThroughSExt) { + assert(isMalloc(CI) && "getMallocArraySize and not malloc call"); + return computeArraySize(CI, TD, LookThroughSExt); +} + +//===----------------------------------------------------------------------===// +// free Call Utility Functions. +// + +/// isFreeCall - Returns non-null if the value is a call to the builtin free() +const CallInst *llvm::isFreeCall(const Value *I) { + const CallInst *CI = dyn_cast<CallInst>(I); + if (!CI) + return 0; + Function *Callee = CI->getCalledFunction(); + if (Callee == 0 || !Callee->isDeclaration() || Callee->getName() != "free") + return 0; + + // Check free prototype. + // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin + // attribute will exist. + const FunctionType *FTy = Callee->getFunctionType(); + if (!FTy->getReturnType()->isVoidTy()) + return 0; + if (FTy->getNumParams() != 1) + return 0; + if (FTy->param_begin()->get() != Type::getInt8PtrTy(Callee->getContext())) + return 0; + + return CI; +} diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp new file mode 100644 index 0000000..d18d5ce --- /dev/null +++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -0,0 +1,1239 @@ +//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements an analysis that determines, for a given memory +// operation, what preceding memory operations it depends on. It builds on +// alias analysis information, and tries to provide a lazy, caching interface to +// a common kind of alias information query. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "memdep" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Function.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/PHITransAddr.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/PredIteratorCache.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses"); +STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses"); +STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses"); + +STATISTIC(NumCacheNonLocalPtr, + "Number of fully cached non-local ptr responses"); +STATISTIC(NumCacheDirtyNonLocalPtr, + "Number of cached, but dirty, non-local ptr responses"); +STATISTIC(NumUncacheNonLocalPtr, + "Number of uncached non-local ptr responses"); +STATISTIC(NumCacheCompleteNonLocalPtr, + "Number of block queries that were completely cached"); + +char MemoryDependenceAnalysis::ID = 0; + +// Register this pass... +INITIALIZE_PASS(MemoryDependenceAnalysis, "memdep", + "Memory Dependence Analysis", false, true); + +MemoryDependenceAnalysis::MemoryDependenceAnalysis() +: FunctionPass(ID), PredCache(0) { +} +MemoryDependenceAnalysis::~MemoryDependenceAnalysis() { +} + +/// Clean up memory in between runs +void MemoryDependenceAnalysis::releaseMemory() { + LocalDeps.clear(); + NonLocalDeps.clear(); + NonLocalPointerDeps.clear(); + ReverseLocalDeps.clear(); + ReverseNonLocalDeps.clear(); + ReverseNonLocalPtrDeps.clear(); + PredCache->clear(); +} + + + +/// getAnalysisUsage - Does not modify anything. It uses Alias Analysis. +/// +void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<AliasAnalysis>(); +} + +bool MemoryDependenceAnalysis::runOnFunction(Function &) { + AA = &getAnalysis<AliasAnalysis>(); + if (PredCache == 0) + PredCache.reset(new PredIteratorCache()); + return false; +} + +/// RemoveFromReverseMap - This is a helper function that removes Val from +/// 'Inst's set in ReverseMap. If the set becomes empty, remove Inst's entry. +template <typename KeyTy> +static void RemoveFromReverseMap(DenseMap<Instruction*, + SmallPtrSet<KeyTy, 4> > &ReverseMap, + Instruction *Inst, KeyTy Val) { + typename DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> >::iterator + InstIt = ReverseMap.find(Inst); + assert(InstIt != ReverseMap.end() && "Reverse map out of sync?"); + bool Found = InstIt->second.erase(Val); + assert(Found && "Invalid reverse map!"); Found=Found; + if (InstIt->second.empty()) + ReverseMap.erase(InstIt); +} + + +/// getCallSiteDependencyFrom - Private helper for finding the local +/// dependencies of a call site. +MemDepResult MemoryDependenceAnalysis:: +getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, + BasicBlock::iterator ScanIt, BasicBlock *BB) { + // Walk backwards through the block, looking for dependencies + while (ScanIt != BB->begin()) { + Instruction *Inst = --ScanIt; + + // If this inst is a memory op, get the pointer it accessed + Value *Pointer = 0; + uint64_t PointerSize = 0; + if (StoreInst *S = dyn_cast<StoreInst>(Inst)) { + Pointer = S->getPointerOperand(); + PointerSize = AA->getTypeStoreSize(S->getOperand(0)->getType()); + } else if (VAArgInst *V = dyn_cast<VAArgInst>(Inst)) { + Pointer = V->getOperand(0); + PointerSize = AA->getTypeStoreSize(V->getType()); + } else if (const CallInst *CI = isFreeCall(Inst)) { + Pointer = CI->getArgOperand(0); + // calls to free() erase the entire structure + PointerSize = ~0ULL; + } else if (CallSite InstCS = cast<Value>(Inst)) { + // Debug intrinsics don't cause dependences. + if (isa<DbgInfoIntrinsic>(Inst)) continue; + // If these two calls do not interfere, look past it. + switch (AA->getModRefInfo(CS, InstCS)) { + case AliasAnalysis::NoModRef: + // If the two calls are the same, return InstCS as a Def, so that + // CS can be found redundant and eliminated. + if (isReadOnlyCall && InstCS.onlyReadsMemory() && + CS.getInstruction()->isIdenticalToWhenDefined(Inst)) + return MemDepResult::getDef(Inst); + + // Otherwise if the two calls don't interact (e.g. InstCS is readnone) + // keep scanning. + continue; + default: + return MemDepResult::getClobber(Inst); + } + } else { + // Non-memory instruction. + continue; + } + + if (AA->getModRefInfo(CS, Pointer, PointerSize) != AliasAnalysis::NoModRef) + return MemDepResult::getClobber(Inst); + } + + // No dependence found. If this is the entry block of the function, it is a + // clobber, otherwise it is non-local. + if (BB != &BB->getParent()->getEntryBlock()) + return MemDepResult::getNonLocal(); + return MemDepResult::getClobber(ScanIt); +} + +/// getPointerDependencyFrom - Return the instruction on which a memory +/// location depends. If isLoad is true, this routine ignore may-aliases with +/// read-only operations. +MemDepResult MemoryDependenceAnalysis:: +getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, + BasicBlock::iterator ScanIt, BasicBlock *BB) { + + Value *InvariantTag = 0; + + // Walk backwards through the basic block, looking for dependencies. + while (ScanIt != BB->begin()) { + Instruction *Inst = --ScanIt; + + // If we're in an invariant region, no dependencies can be found before + // we pass an invariant-begin marker. + if (InvariantTag == Inst) { + InvariantTag = 0; + continue; + } + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { + // Debug intrinsics don't cause dependences. + if (isa<DbgInfoIntrinsic>(Inst)) continue; + + // If we pass an invariant-end marker, then we've just entered an + // invariant region and can start ignoring dependencies. + if (II->getIntrinsicID() == Intrinsic::invariant_end) { + // FIXME: This only considers queries directly on the invariant-tagged + // pointer, not on query pointers that are indexed off of them. It'd + // be nice to handle that at some point. + AliasAnalysis::AliasResult R = AA->alias(II->getArgOperand(2), MemPtr); + if (R == AliasAnalysis::MustAlias) { + InvariantTag = II->getArgOperand(0); + continue; + } + + // If we reach a lifetime begin or end marker, then the query ends here + // because the value is undefined. + } else if (II->getIntrinsicID() == Intrinsic::lifetime_start) { + // FIXME: This only considers queries directly on the invariant-tagged + // pointer, not on query pointers that are indexed off of them. It'd + // be nice to handle that at some point. + AliasAnalysis::AliasResult R = AA->alias(II->getArgOperand(1), MemPtr); + if (R == AliasAnalysis::MustAlias) + return MemDepResult::getDef(II); + } + } + + // If we're querying on a load and we're in an invariant region, we're done + // at this point. Nothing a load depends on can live in an invariant region. + if (isLoad && InvariantTag) continue; + + // Values depend on loads if the pointers are must aliased. This means that + // a load depends on another must aliased load from the same value. + if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + Value *Pointer = LI->getPointerOperand(); + uint64_t PointerSize = AA->getTypeStoreSize(LI->getType()); + + // If we found a pointer, check if it could be the same as our pointer. + AliasAnalysis::AliasResult R = + AA->alias(Pointer, PointerSize, MemPtr, MemSize); + if (R == AliasAnalysis::NoAlias) + continue; + + // May-alias loads don't depend on each other without a dependence. + if (isLoad && R == AliasAnalysis::MayAlias) + continue; + // Stores depend on may and must aliased loads, loads depend on must-alias + // loads. + return MemDepResult::getDef(Inst); + } + + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + // There can't be stores to the value we care about inside an + // invariant region. + if (InvariantTag) continue; + + // If alias analysis can tell that this store is guaranteed to not modify + // the query pointer, ignore it. Use getModRefInfo to handle cases where + // the query pointer points to constant memory etc. + if (AA->getModRefInfo(SI, MemPtr, MemSize) == AliasAnalysis::NoModRef) + continue; + + // Ok, this store might clobber the query pointer. Check to see if it is + // a must alias: in this case, we want to return this as a def. + Value *Pointer = SI->getPointerOperand(); + uint64_t PointerSize = AA->getTypeStoreSize(SI->getOperand(0)->getType()); + + // If we found a pointer, check if it could be the same as our pointer. + AliasAnalysis::AliasResult R = + AA->alias(Pointer, PointerSize, MemPtr, MemSize); + + if (R == AliasAnalysis::NoAlias) + continue; + if (R == AliasAnalysis::MayAlias) + return MemDepResult::getClobber(Inst); + return MemDepResult::getDef(Inst); + } + + // If this is an allocation, and if we know that the accessed pointer is to + // the allocation, return Def. This means that there is no dependence and + // the access can be optimized based on that. For example, a load could + // turn into undef. + // Note: Only determine this to be a malloc if Inst is the malloc call, not + // a subsequent bitcast of the malloc call result. There can be stores to + // the malloced memory between the malloc call and its bitcast uses, and we + // need to continue scanning until the malloc call. + if (isa<AllocaInst>(Inst) || + (isa<CallInst>(Inst) && extractMallocCall(Inst))) { + Value *AccessPtr = MemPtr->getUnderlyingObject(); + + if (AccessPtr == Inst || + AA->alias(Inst, 1, AccessPtr, 1) == AliasAnalysis::MustAlias) + return MemDepResult::getDef(Inst); + continue; + } + + // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. + switch (AA->getModRefInfo(Inst, MemPtr, MemSize)) { + case AliasAnalysis::NoModRef: + // If the call has no effect on the queried pointer, just ignore it. + continue; + case AliasAnalysis::Mod: + // If we're in an invariant region, we can ignore calls that ONLY + // modify the pointer. + if (InvariantTag) continue; + return MemDepResult::getClobber(Inst); + case AliasAnalysis::Ref: + // If the call is known to never store to the pointer, and if this is a + // load query, we can safely ignore it (scan past it). + if (isLoad) + continue; + default: + // Otherwise, there is a potential dependence. Return a clobber. + return MemDepResult::getClobber(Inst); + } + } + + // No dependence found. If this is the entry block of the function, it is a + // clobber, otherwise it is non-local. + if (BB != &BB->getParent()->getEntryBlock()) + return MemDepResult::getNonLocal(); + return MemDepResult::getClobber(ScanIt); +} + +/// getDependency - Return the instruction on which a memory operation +/// depends. +MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { + Instruction *ScanPos = QueryInst; + + // Check for a cached result + MemDepResult &LocalCache = LocalDeps[QueryInst]; + + // If the cached entry is non-dirty, just return it. Note that this depends + // on MemDepResult's default constructing to 'dirty'. + if (!LocalCache.isDirty()) + return LocalCache; + + // Otherwise, if we have a dirty entry, we know we can start the scan at that + // instruction, which may save us some work. + if (Instruction *Inst = LocalCache.getInst()) { + ScanPos = Inst; + + RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst); + } + + BasicBlock *QueryParent = QueryInst->getParent(); + + Value *MemPtr = 0; + uint64_t MemSize = 0; + + // Do the scan. + if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) { + // No dependence found. If this is the entry block of the function, it is a + // clobber, otherwise it is non-local. + if (QueryParent != &QueryParent->getParent()->getEntryBlock()) + LocalCache = MemDepResult::getNonLocal(); + else + LocalCache = MemDepResult::getClobber(QueryInst); + } else if (StoreInst *SI = dyn_cast<StoreInst>(QueryInst)) { + // If this is a volatile store, don't mess around with it. Just return the + // previous instruction as a clobber. + if (SI->isVolatile()) + LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos)); + else { + MemPtr = SI->getPointerOperand(); + MemSize = AA->getTypeStoreSize(SI->getOperand(0)->getType()); + } + } else if (LoadInst *LI = dyn_cast<LoadInst>(QueryInst)) { + // If this is a volatile load, don't mess around with it. Just return the + // previous instruction as a clobber. + if (LI->isVolatile()) + LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos)); + else { + MemPtr = LI->getPointerOperand(); + MemSize = AA->getTypeStoreSize(LI->getType()); + } + } else if (const CallInst *CI = isFreeCall(QueryInst)) { + MemPtr = CI->getArgOperand(0); + // calls to free() erase the entire structure, not just a field. + MemSize = ~0UL; + } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) { + int IntrinsicID = 0; // Intrinsic IDs start at 1. + IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst); + if (II) + IntrinsicID = II->getIntrinsicID(); + + switch (IntrinsicID) { + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: + MemPtr = II->getArgOperand(1); + MemSize = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); + break; + case Intrinsic::invariant_end: + MemPtr = II->getArgOperand(2); + MemSize = cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(); + break; + default: + CallSite QueryCS(QueryInst); + bool isReadOnly = AA->onlyReadsMemory(QueryCS); + LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos, + QueryParent); + break; + } + } else { + // Non-memory instruction. + LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos)); + } + + // If we need to do a pointer scan, make it happen. + if (MemPtr) { + bool isLoad = !QueryInst->mayWriteToMemory(); + if (IntrinsicInst *II = dyn_cast<MemoryUseIntrinsic>(QueryInst)) { + isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_end; + } + LocalCache = getPointerDependencyFrom(MemPtr, MemSize, isLoad, ScanPos, + QueryParent); + } + + // Remember the result! + if (Instruction *I = LocalCache.getInst()) + ReverseLocalDeps[I].insert(QueryInst); + + return LocalCache; +} + +#ifndef NDEBUG +/// AssertSorted - This method is used when -debug is specified to verify that +/// cache arrays are properly kept sorted. +static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, + int Count = -1) { + if (Count == -1) Count = Cache.size(); + if (Count == 0) return; + + for (unsigned i = 1; i != unsigned(Count); ++i) + assert(!(Cache[i] < Cache[i-1]) && "Cache isn't sorted!"); +} +#endif + +/// getNonLocalCallDependency - Perform a full dependency query for the +/// specified call, returning the set of blocks that the value is +/// potentially live across. The returned set of results will include a +/// "NonLocal" result for all blocks where the value is live across. +/// +/// This method assumes the instruction returns a "NonLocal" dependency +/// within its own block. +/// +/// This returns a reference to an internal data structure that may be +/// invalidated on the next non-local query or when an instruction is +/// removed. Clients must copy this data if they want it around longer than +/// that. +const MemoryDependenceAnalysis::NonLocalDepInfo & +MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { + assert(getDependency(QueryCS.getInstruction()).isNonLocal() && + "getNonLocalCallDependency should only be used on calls with non-local deps!"); + PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()]; + NonLocalDepInfo &Cache = CacheP.first; + + /// DirtyBlocks - This is the set of blocks that need to be recomputed. In + /// the cached case, this can happen due to instructions being deleted etc. In + /// the uncached case, this starts out as the set of predecessors we care + /// about. + SmallVector<BasicBlock*, 32> DirtyBlocks; + + if (!Cache.empty()) { + // Okay, we have a cache entry. If we know it is not dirty, just return it + // with no computation. + if (!CacheP.second) { + ++NumCacheNonLocal; + return Cache; + } + + // If we already have a partially computed set of results, scan them to + // determine what is dirty, seeding our initial DirtyBlocks worklist. + for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end(); + I != E; ++I) + if (I->getResult().isDirty()) + DirtyBlocks.push_back(I->getBB()); + + // Sort the cache so that we can do fast binary search lookups below. + std::sort(Cache.begin(), Cache.end()); + + ++NumCacheDirtyNonLocal; + //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: " + // << Cache.size() << " cached: " << *QueryInst; + } else { + // Seed DirtyBlocks with each of the preds of QueryInst's block. + BasicBlock *QueryBB = QueryCS.getInstruction()->getParent(); + for (BasicBlock **PI = PredCache->GetPreds(QueryBB); *PI; ++PI) + DirtyBlocks.push_back(*PI); + ++NumUncacheNonLocal; + } + + // isReadonlyCall - If this is a read-only call, we can be more aggressive. + bool isReadonlyCall = AA->onlyReadsMemory(QueryCS); + + SmallPtrSet<BasicBlock*, 64> Visited; + + unsigned NumSortedEntries = Cache.size(); + DEBUG(AssertSorted(Cache)); + + // Iterate while we still have blocks to update. + while (!DirtyBlocks.empty()) { + BasicBlock *DirtyBB = DirtyBlocks.back(); + DirtyBlocks.pop_back(); + + // Already processed this block? + if (!Visited.insert(DirtyBB)) + continue; + + // Do a binary search to see if we already have an entry for this block in + // the cache set. If so, find it. + DEBUG(AssertSorted(Cache, NumSortedEntries)); + NonLocalDepInfo::iterator Entry = + std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries, + NonLocalDepEntry(DirtyBB)); + if (Entry != Cache.begin() && prior(Entry)->getBB() == DirtyBB) + --Entry; + + NonLocalDepEntry *ExistingResult = 0; + if (Entry != Cache.begin()+NumSortedEntries && + Entry->getBB() == DirtyBB) { + // If we already have an entry, and if it isn't already dirty, the block + // is done. + if (!Entry->getResult().isDirty()) + continue; + + // Otherwise, remember this slot so we can update the value. + ExistingResult = &*Entry; + } + + // If the dirty entry has a pointer, start scanning from it so we don't have + // to rescan the entire block. + BasicBlock::iterator ScanPos = DirtyBB->end(); + if (ExistingResult) { + if (Instruction *Inst = ExistingResult->getResult().getInst()) { + ScanPos = Inst; + // We're removing QueryInst's use of Inst. + RemoveFromReverseMap(ReverseNonLocalDeps, Inst, + QueryCS.getInstruction()); + } + } + + // Find out if this block has a local dependency for QueryInst. + MemDepResult Dep; + + if (ScanPos != DirtyBB->begin()) { + Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB); + } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) { + // No dependence found. If this is the entry block of the function, it is + // a clobber, otherwise it is non-local. + Dep = MemDepResult::getNonLocal(); + } else { + Dep = MemDepResult::getClobber(ScanPos); + } + + // If we had a dirty entry for the block, update it. Otherwise, just add + // a new entry. + if (ExistingResult) + ExistingResult->setResult(Dep); + else + Cache.push_back(NonLocalDepEntry(DirtyBB, Dep)); + + // If the block has a dependency (i.e. it isn't completely transparent to + // the value), remember the association! + if (!Dep.isNonLocal()) { + // Keep the ReverseNonLocalDeps map up to date so we can efficiently + // update this when we remove instructions. + if (Instruction *Inst = Dep.getInst()) + ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction()); + } else { + + // If the block *is* completely transparent to the load, we need to check + // the predecessors of this block. Add them to our worklist. + for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI) + DirtyBlocks.push_back(*PI); + } + } + + return Cache; +} + +/// getNonLocalPointerDependency - Perform a full dependency query for an +/// access to the specified (non-volatile) memory location, returning the +/// set of instructions that either define or clobber the value. +/// +/// This method assumes the pointer has a "NonLocal" dependency within its +/// own block. +/// +void MemoryDependenceAnalysis:: +getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB, + SmallVectorImpl<NonLocalDepResult> &Result) { + assert(Pointer->getType()->isPointerTy() && + "Can't get pointer deps of a non-pointer!"); + Result.clear(); + + // We know that the pointer value is live into FromBB find the def/clobbers + // from presecessors. + const Type *EltTy = cast<PointerType>(Pointer->getType())->getElementType(); + uint64_t PointeeSize = AA->getTypeStoreSize(EltTy); + + PHITransAddr Address(Pointer, TD); + + // This is the set of blocks we've inspected, and the pointer we consider in + // each block. Because of critical edges, we currently bail out if querying + // a block with multiple different pointers. This can happen during PHI + // translation. + DenseMap<BasicBlock*, Value*> Visited; + if (!getNonLocalPointerDepFromBB(Address, PointeeSize, isLoad, FromBB, + Result, Visited, true)) + return; + Result.clear(); + Result.push_back(NonLocalDepResult(FromBB, + MemDepResult::getClobber(FromBB->begin()), + Pointer)); +} + +/// GetNonLocalInfoForBlock - Compute the memdep value for BB with +/// Pointer/PointeeSize using either cached information in Cache or by doing a +/// lookup (which may use dirty cache info if available). If we do a lookup, +/// add the result to the cache. +MemDepResult MemoryDependenceAnalysis:: +GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize, + bool isLoad, BasicBlock *BB, + NonLocalDepInfo *Cache, unsigned NumSortedEntries) { + + // Do a binary search to see if we already have an entry for this block in + // the cache set. If so, find it. + NonLocalDepInfo::iterator Entry = + std::upper_bound(Cache->begin(), Cache->begin()+NumSortedEntries, + NonLocalDepEntry(BB)); + if (Entry != Cache->begin() && (Entry-1)->getBB() == BB) + --Entry; + + NonLocalDepEntry *ExistingResult = 0; + if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB) + ExistingResult = &*Entry; + + // If we have a cached entry, and it is non-dirty, use it as the value for + // this dependency. + if (ExistingResult && !ExistingResult->getResult().isDirty()) { + ++NumCacheNonLocalPtr; + return ExistingResult->getResult(); + } + + // Otherwise, we have to scan for the value. If we have a dirty cache + // entry, start scanning from its position, otherwise we scan from the end + // of the block. + BasicBlock::iterator ScanPos = BB->end(); + if (ExistingResult && ExistingResult->getResult().getInst()) { + assert(ExistingResult->getResult().getInst()->getParent() == BB && + "Instruction invalidated?"); + ++NumCacheDirtyNonLocalPtr; + ScanPos = ExistingResult->getResult().getInst(); + + // Eliminating the dirty entry from 'Cache', so update the reverse info. + ValueIsLoadPair CacheKey(Pointer, isLoad); + RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey); + } else { + ++NumUncacheNonLocalPtr; + } + + // Scan the block for the dependency. + MemDepResult Dep = getPointerDependencyFrom(Pointer, PointeeSize, isLoad, + ScanPos, BB); + + // If we had a dirty entry for the block, update it. Otherwise, just add + // a new entry. + if (ExistingResult) + ExistingResult->setResult(Dep); + else + Cache->push_back(NonLocalDepEntry(BB, Dep)); + + // If the block has a dependency (i.e. it isn't completely transparent to + // the value), remember the reverse association because we just added it + // to Cache! + if (Dep.isNonLocal()) + return Dep; + + // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently + // update MemDep when we remove instructions. + Instruction *Inst = Dep.getInst(); + assert(Inst && "Didn't depend on anything?"); + ValueIsLoadPair CacheKey(Pointer, isLoad); + ReverseNonLocalPtrDeps[Inst].insert(CacheKey); + return Dep; +} + +/// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain +/// number of elements in the array that are already properly ordered. This is +/// optimized for the case when only a few entries are added. +static void +SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, + unsigned NumSortedEntries) { + switch (Cache.size() - NumSortedEntries) { + case 0: + // done, no new entries. + break; + case 2: { + // Two new entries, insert the last one into place. + NonLocalDepEntry Val = Cache.back(); + Cache.pop_back(); + MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry = + std::upper_bound(Cache.begin(), Cache.end()-1, Val); + Cache.insert(Entry, Val); + // FALL THROUGH. + } + case 1: + // One new entry, Just insert the new value at the appropriate position. + if (Cache.size() != 1) { + NonLocalDepEntry Val = Cache.back(); + Cache.pop_back(); + MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry = + std::upper_bound(Cache.begin(), Cache.end(), Val); + Cache.insert(Entry, Val); + } + break; + default: + // Added many values, do a full scale sort. + std::sort(Cache.begin(), Cache.end()); + break; + } +} + +/// getNonLocalPointerDepFromBB - Perform a dependency query based on +/// pointer/pointeesize starting at the end of StartBB. Add any clobber/def +/// results to the results vector and keep track of which blocks are visited in +/// 'Visited'. +/// +/// This has special behavior for the first block queries (when SkipFirstBlock +/// is true). In this special case, it ignores the contents of the specified +/// block and starts returning dependence info for its predecessors. +/// +/// This function returns false on success, or true to indicate that it could +/// not compute dependence information for some reason. This should be treated +/// as a clobber dependence on the first instruction in the predecessor block. +bool MemoryDependenceAnalysis:: +getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize, + bool isLoad, BasicBlock *StartBB, + SmallVectorImpl<NonLocalDepResult> &Result, + DenseMap<BasicBlock*, Value*> &Visited, + bool SkipFirstBlock) { + + // Look up the cached info for Pointer. + ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad); + + std::pair<BBSkipFirstBlockPair, NonLocalDepInfo> *CacheInfo = + &NonLocalPointerDeps[CacheKey]; + NonLocalDepInfo *Cache = &CacheInfo->second; + + // If we have valid cached information for exactly the block we are + // investigating, just return it with no recomputation. + if (CacheInfo->first == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) { + // We have a fully cached result for this query then we can just return the + // cached results and populate the visited set. However, we have to verify + // that we don't already have conflicting results for these blocks. Check + // to ensure that if a block in the results set is in the visited set that + // it was for the same pointer query. + if (!Visited.empty()) { + for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); + I != E; ++I) { + DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB()); + if (VI == Visited.end() || VI->second == Pointer.getAddr()) + continue; + + // We have a pointer mismatch in a block. Just return clobber, saying + // that something was clobbered in this result. We could also do a + // non-fully cached query, but there is little point in doing this. + return true; + } + } + + Value *Addr = Pointer.getAddr(); + for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); + I != E; ++I) { + Visited.insert(std::make_pair(I->getBB(), Addr)); + if (!I->getResult().isNonLocal()) + Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Addr)); + } + ++NumCacheCompleteNonLocalPtr; + return false; + } + + // Otherwise, either this is a new block, a block with an invalid cache + // pointer or one that we're about to invalidate by putting more info into it + // than its valid cache info. If empty, the result will be valid cache info, + // otherwise it isn't. + if (Cache->empty()) + CacheInfo->first = BBSkipFirstBlockPair(StartBB, SkipFirstBlock); + else + CacheInfo->first = BBSkipFirstBlockPair(); + + SmallVector<BasicBlock*, 32> Worklist; + Worklist.push_back(StartBB); + + // Keep track of the entries that we know are sorted. Previously cached + // entries will all be sorted. The entries we add we only sort on demand (we + // don't insert every element into its sorted position). We know that we + // won't get any reuse from currently inserted values, because we don't + // revisit blocks after we insert info for them. + unsigned NumSortedEntries = Cache->size(); + DEBUG(AssertSorted(*Cache)); + + while (!Worklist.empty()) { + BasicBlock *BB = Worklist.pop_back_val(); + + // Skip the first block if we have it. + if (!SkipFirstBlock) { + // Analyze the dependency of *Pointer in FromBB. See if we already have + // been here. + assert(Visited.count(BB) && "Should check 'visited' before adding to WL"); + + // Get the dependency info for Pointer in BB. If we have cached + // information, we will use it, otherwise we compute it. + DEBUG(AssertSorted(*Cache, NumSortedEntries)); + MemDepResult Dep = GetNonLocalInfoForBlock(Pointer.getAddr(), PointeeSize, + isLoad, BB, Cache, + NumSortedEntries); + + // If we got a Def or Clobber, add this to the list of results. + if (!Dep.isNonLocal()) { + Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr())); + continue; + } + } + + // If 'Pointer' is an instruction defined in this block, then we need to do + // phi translation to change it into a value live in the predecessor block. + // If not, we just add the predecessors to the worklist and scan them with + // the same Pointer. + if (!Pointer.NeedsPHITranslationFromBlock(BB)) { + SkipFirstBlock = false; + for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { + // Verify that we haven't looked at this block yet. + std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool> + InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr())); + if (InsertRes.second) { + // First time we've looked at *PI. + Worklist.push_back(*PI); + continue; + } + + // If we have seen this block before, but it was with a different + // pointer then we have a phi translation failure and we have to treat + // this as a clobber. + if (InsertRes.first->second != Pointer.getAddr()) + goto PredTranslationFailure; + } + continue; + } + + // We do need to do phi translation, if we know ahead of time we can't phi + // translate this value, don't even try. + if (!Pointer.IsPotentiallyPHITranslatable()) + goto PredTranslationFailure; + + // We may have added values to the cache list before this PHI translation. + // If so, we haven't done anything to ensure that the cache remains sorted. + // Sort it now (if needed) so that recursive invocations of + // getNonLocalPointerDepFromBB and other routines that could reuse the cache + // value will only see properly sorted cache arrays. + if (Cache && NumSortedEntries != Cache->size()) { + SortNonLocalDepInfoCache(*Cache, NumSortedEntries); + NumSortedEntries = Cache->size(); + } + Cache = 0; + + for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { + BasicBlock *Pred = *PI; + + // Get the PHI translated pointer in this predecessor. This can fail if + // not translatable, in which case the getAddr() returns null. + PHITransAddr PredPointer(Pointer); + PredPointer.PHITranslateValue(BB, Pred, 0); + + Value *PredPtrVal = PredPointer.getAddr(); + + // Check to see if we have already visited this pred block with another + // pointer. If so, we can't do this lookup. This failure can occur + // with PHI translation when a critical edge exists and the PHI node in + // the successor translates to a pointer value different than the + // pointer the block was first analyzed with. + std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool> + InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal)); + + if (!InsertRes.second) { + // If the predecessor was visited with PredPtr, then we already did + // the analysis and can ignore it. + if (InsertRes.first->second == PredPtrVal) + continue; + + // Otherwise, the block was previously analyzed with a different + // pointer. We can't represent the result of this case, so we just + // treat this as a phi translation failure. + goto PredTranslationFailure; + } + + // If PHI translation was unable to find an available pointer in this + // predecessor, then we have to assume that the pointer is clobbered in + // that predecessor. We can still do PRE of the load, which would insert + // a computation of the pointer in this predecessor. + if (PredPtrVal == 0) { + // Add the entry to the Result list. + NonLocalDepResult Entry(Pred, + MemDepResult::getClobber(Pred->getTerminator()), + PredPtrVal); + Result.push_back(Entry); + + // Since we had a phi translation failure, the cache for CacheKey won't + // include all of the entries that we need to immediately satisfy future + // queries. Mark this in NonLocalPointerDeps by setting the + // BBSkipFirstBlockPair pointer to null. This requires reuse of the + // cached value to do more work but not miss the phi trans failure. + NonLocalPointerDeps[CacheKey].first = BBSkipFirstBlockPair(); + continue; + } + + // FIXME: it is entirely possible that PHI translating will end up with + // the same value. Consider PHI translating something like: + // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need* + // to recurse here, pedantically speaking. + + // If we have a problem phi translating, fall through to the code below + // to handle the failure condition. + if (getNonLocalPointerDepFromBB(PredPointer, PointeeSize, isLoad, Pred, + Result, Visited)) + goto PredTranslationFailure; + } + + // Refresh the CacheInfo/Cache pointer so that it isn't invalidated. + CacheInfo = &NonLocalPointerDeps[CacheKey]; + Cache = &CacheInfo->second; + NumSortedEntries = Cache->size(); + + // Since we did phi translation, the "Cache" set won't contain all of the + // results for the query. This is ok (we can still use it to accelerate + // specific block queries) but we can't do the fastpath "return all + // results from the set" Clear out the indicator for this. + CacheInfo->first = BBSkipFirstBlockPair(); + SkipFirstBlock = false; + continue; + + PredTranslationFailure: + + if (Cache == 0) { + // Refresh the CacheInfo/Cache pointer if it got invalidated. + CacheInfo = &NonLocalPointerDeps[CacheKey]; + Cache = &CacheInfo->second; + NumSortedEntries = Cache->size(); + } + + // Since we failed phi translation, the "Cache" set won't contain all of the + // results for the query. This is ok (we can still use it to accelerate + // specific block queries) but we can't do the fastpath "return all + // results from the set". Clear out the indicator for this. + CacheInfo->first = BBSkipFirstBlockPair(); + + // If *nothing* works, mark the pointer as being clobbered by the first + // instruction in this block. + // + // If this is the magic first block, return this as a clobber of the whole + // incoming value. Since we can't phi translate to one of the predecessors, + // we have to bail out. + if (SkipFirstBlock) + return true; + + for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) { + assert(I != Cache->rend() && "Didn't find current block??"); + if (I->getBB() != BB) + continue; + + assert(I->getResult().isNonLocal() && + "Should only be here with transparent block"); + I->setResult(MemDepResult::getClobber(BB->begin())); + ReverseNonLocalPtrDeps[BB->begin()].insert(CacheKey); + Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), + Pointer.getAddr())); + break; + } + } + + // Okay, we're done now. If we added new values to the cache, re-sort it. + SortNonLocalDepInfoCache(*Cache, NumSortedEntries); + DEBUG(AssertSorted(*Cache)); + return false; +} + +/// RemoveCachedNonLocalPointerDependencies - If P exists in +/// CachedNonLocalPointerInfo, remove it. +void MemoryDependenceAnalysis:: +RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) { + CachedNonLocalPointerInfo::iterator It = + NonLocalPointerDeps.find(P); + if (It == NonLocalPointerDeps.end()) return; + + // Remove all of the entries in the BB->val map. This involves removing + // instructions from the reverse map. + NonLocalDepInfo &PInfo = It->second.second; + + for (unsigned i = 0, e = PInfo.size(); i != e; ++i) { + Instruction *Target = PInfo[i].getResult().getInst(); + if (Target == 0) continue; // Ignore non-local dep results. + assert(Target->getParent() == PInfo[i].getBB()); + + // Eliminating the dirty entry from 'Cache', so update the reverse info. + RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P); + } + + // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo). + NonLocalPointerDeps.erase(It); +} + + +/// invalidateCachedPointerInfo - This method is used to invalidate cached +/// information about the specified pointer, because it may be too +/// conservative in memdep. This is an optional call that can be used when +/// the client detects an equivalence between the pointer and some other +/// value and replaces the other value with ptr. This can make Ptr available +/// in more places that cached info does not necessarily keep. +void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) { + // If Ptr isn't really a pointer, just ignore it. + if (!Ptr->getType()->isPointerTy()) return; + // Flush store info for the pointer. + RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false)); + // Flush load info for the pointer. + RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true)); +} + +/// invalidateCachedPredecessors - Clear the PredIteratorCache info. +/// This needs to be done when the CFG changes, e.g., due to splitting +/// critical edges. +void MemoryDependenceAnalysis::invalidateCachedPredecessors() { + PredCache->clear(); +} + +/// removeInstruction - Remove an instruction from the dependence analysis, +/// updating the dependence of instructions that previously depended on it. +/// This method attempts to keep the cache coherent using the reverse map. +void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { + // Walk through the Non-local dependencies, removing this one as the value + // for any cached queries. + NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst); + if (NLDI != NonLocalDeps.end()) { + NonLocalDepInfo &BlockMap = NLDI->second.first; + for (NonLocalDepInfo::iterator DI = BlockMap.begin(), DE = BlockMap.end(); + DI != DE; ++DI) + if (Instruction *Inst = DI->getResult().getInst()) + RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst); + NonLocalDeps.erase(NLDI); + } + + // If we have a cached local dependence query for this instruction, remove it. + // + LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst); + if (LocalDepEntry != LocalDeps.end()) { + // Remove us from DepInst's reverse set now that the local dep info is gone. + if (Instruction *Inst = LocalDepEntry->second.getInst()) + RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst); + + // Remove this local dependency info. + LocalDeps.erase(LocalDepEntry); + } + + // If we have any cached pointer dependencies on this instruction, remove + // them. If the instruction has non-pointer type, then it can't be a pointer + // base. + + // Remove it from both the load info and the store info. The instruction + // can't be in either of these maps if it is non-pointer. + if (RemInst->getType()->isPointerTy()) { + RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false)); + RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true)); + } + + // Loop over all of the things that depend on the instruction we're removing. + // + SmallVector<std::pair<Instruction*, Instruction*>, 8> ReverseDepsToAdd; + + // If we find RemInst as a clobber or Def in any of the maps for other values, + // we need to replace its entry with a dirty version of the instruction after + // it. If RemInst is a terminator, we use a null dirty value. + // + // Using a dirty version of the instruction after RemInst saves having to scan + // the entire block to get to this point. + MemDepResult NewDirtyVal; + if (!RemInst->isTerminator()) + NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst)); + + ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst); + if (ReverseDepIt != ReverseLocalDeps.end()) { + SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second; + // RemInst can't be the terminator if it has local stuff depending on it. + assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) && + "Nothing can locally depend on a terminator"); + + for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(), + E = ReverseDeps.end(); I != E; ++I) { + Instruction *InstDependingOnRemInst = *I; + assert(InstDependingOnRemInst != RemInst && + "Already removed our local dep info"); + + LocalDeps[InstDependingOnRemInst] = NewDirtyVal; + + // Make sure to remember that new things depend on NewDepInst. + assert(NewDirtyVal.getInst() && "There is no way something else can have " + "a local dep on this if it is a terminator!"); + ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(), + InstDependingOnRemInst)); + } + + ReverseLocalDeps.erase(ReverseDepIt); + + // Add new reverse deps after scanning the set, to avoid invalidating the + // 'ReverseDeps' reference. + while (!ReverseDepsToAdd.empty()) { + ReverseLocalDeps[ReverseDepsToAdd.back().first] + .insert(ReverseDepsToAdd.back().second); + ReverseDepsToAdd.pop_back(); + } + } + + ReverseDepIt = ReverseNonLocalDeps.find(RemInst); + if (ReverseDepIt != ReverseNonLocalDeps.end()) { + SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second; + for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end(); + I != E; ++I) { + assert(*I != RemInst && "Already removed NonLocalDep info for RemInst"); + + PerInstNLInfo &INLD = NonLocalDeps[*I]; + // The information is now dirty! + INLD.second = true; + + for (NonLocalDepInfo::iterator DI = INLD.first.begin(), + DE = INLD.first.end(); DI != DE; ++DI) { + if (DI->getResult().getInst() != RemInst) continue; + + // Convert to a dirty entry for the subsequent instruction. + DI->setResult(NewDirtyVal); + + if (Instruction *NextI = NewDirtyVal.getInst()) + ReverseDepsToAdd.push_back(std::make_pair(NextI, *I)); + } + } + + ReverseNonLocalDeps.erase(ReverseDepIt); + + // Add new reverse deps after scanning the set, to avoid invalidating 'Set' + while (!ReverseDepsToAdd.empty()) { + ReverseNonLocalDeps[ReverseDepsToAdd.back().first] + .insert(ReverseDepsToAdd.back().second); + ReverseDepsToAdd.pop_back(); + } + } + + // If the instruction is in ReverseNonLocalPtrDeps then it appears as a + // value in the NonLocalPointerDeps info. + ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt = + ReverseNonLocalPtrDeps.find(RemInst); + if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) { + SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second; + SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd; + + for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(), + E = Set.end(); I != E; ++I) { + ValueIsLoadPair P = *I; + assert(P.getPointer() != RemInst && + "Already removed NonLocalPointerDeps info for RemInst"); + + NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].second; + + // The cache is not valid for any specific block anymore. + NonLocalPointerDeps[P].first = BBSkipFirstBlockPair(); + + // Update any entries for RemInst to use the instruction after it. + for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end(); + DI != DE; ++DI) { + if (DI->getResult().getInst() != RemInst) continue; + + // Convert to a dirty entry for the subsequent instruction. + DI->setResult(NewDirtyVal); + + if (Instruction *NewDirtyInst = NewDirtyVal.getInst()) + ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P)); + } + + // Re-sort the NonLocalDepInfo. Changing the dirty entry to its + // subsequent value may invalidate the sortedness. + std::sort(NLPDI.begin(), NLPDI.end()); + } + + ReverseNonLocalPtrDeps.erase(ReversePtrDepIt); + + while (!ReversePtrDepsToAdd.empty()) { + ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first] + .insert(ReversePtrDepsToAdd.back().second); + ReversePtrDepsToAdd.pop_back(); + } + } + + + assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?"); + AA->deleteValue(RemInst); + DEBUG(verifyRemoved(RemInst)); +} +/// verifyRemoved - Verify that the specified instruction does not occur +/// in our internal data structures. +void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { + for (LocalDepMapType::const_iterator I = LocalDeps.begin(), + E = LocalDeps.end(); I != E; ++I) { + assert(I->first != D && "Inst occurs in data structures"); + assert(I->second.getInst() != D && + "Inst occurs in data structures"); + } + + for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(), + E = NonLocalPointerDeps.end(); I != E; ++I) { + assert(I->first.getPointer() != D && "Inst occurs in NLPD map key"); + const NonLocalDepInfo &Val = I->second.second; + for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end(); + II != E; ++II) + assert(II->getResult().getInst() != D && "Inst occurs as NLPD value"); + } + + for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(), + E = NonLocalDeps.end(); I != E; ++I) { + assert(I->first != D && "Inst occurs in data structures"); + const PerInstNLInfo &INLD = I->second; + for (NonLocalDepInfo::const_iterator II = INLD.first.begin(), + EE = INLD.first.end(); II != EE; ++II) + assert(II->getResult().getInst() != D && "Inst occurs in data structures"); + } + + for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(), + E = ReverseLocalDeps.end(); I != E; ++I) { + assert(I->first != D && "Inst occurs in data structures"); + for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(), + EE = I->second.end(); II != EE; ++II) + assert(*II != D && "Inst occurs in data structures"); + } + + for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(), + E = ReverseNonLocalDeps.end(); + I != E; ++I) { + assert(I->first != D && "Inst occurs in data structures"); + for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(), + EE = I->second.end(); II != EE; ++II) + assert(*II != D && "Inst occurs in data structures"); + } + + for (ReverseNonLocalPtrDepTy::const_iterator + I = ReverseNonLocalPtrDeps.begin(), + E = ReverseNonLocalPtrDeps.end(); I != E; ++I) { + assert(I->first != D && "Inst occurs in rev NLPD map"); + + for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(), + E = I->second.end(); II != E; ++II) + assert(*II != ValueIsLoadPair(D, false) && + *II != ValueIsLoadPair(D, true) && + "Inst occurs in ReverseNonLocalPtrDeps map"); + } + +} diff --git a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp new file mode 100644 index 0000000..2cc1c2a --- /dev/null +++ b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -0,0 +1,85 @@ +//===-- ModuleDebugInfoPrinter.cpp - Prints module debug info metadata ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass decodes the debug info metadata in a module and prints in a +// (sufficiently-prepared-) human-readable form. +// +// For example, run this pass from opt along with the -analyze option, and +// it'll print to standard output. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Pass.h" +#include "llvm/Function.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +namespace { + class ModuleDebugInfoPrinter : public ModulePass { + DebugInfoFinder Finder; + public: + static char ID; // Pass identification, replacement for typeid + ModuleDebugInfoPrinter() : ModulePass(ID) {} + + virtual bool runOnModule(Module &M); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + virtual void print(raw_ostream &O, const Module *M) const; + }; +} + +char ModuleDebugInfoPrinter::ID = 0; +INITIALIZE_PASS(ModuleDebugInfoPrinter, "module-debuginfo", + "Decodes module-level debug info", false, true); + +ModulePass *llvm::createModuleDebugInfoPrinterPass() { + return new ModuleDebugInfoPrinter(); +} + +bool ModuleDebugInfoPrinter::runOnModule(Module &M) { + Finder.processModule(M); + return false; +} + +void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const { + for (DebugInfoFinder::iterator I = Finder.compile_unit_begin(), + E = Finder.compile_unit_end(); I != E; ++I) { + O << "Compile Unit: "; + DICompileUnit(*I).print(O); + O << '\n'; + } + + for (DebugInfoFinder::iterator I = Finder.subprogram_begin(), + E = Finder.subprogram_end(); I != E; ++I) { + O << "Subprogram: "; + DISubprogram(*I).print(O); + O << '\n'; + } + + for (DebugInfoFinder::iterator I = Finder.global_variable_begin(), + E = Finder.global_variable_end(); I != E; ++I) { + O << "GlobalVariable: "; + DIGlobalVariable(*I).print(O); + O << '\n'; + } + + for (DebugInfoFinder::iterator I = Finder.type_begin(), + E = Finder.type_end(); I != E; ++I) { + O << "Type: "; + DIType(*I).print(O); + O << '\n'; + } +} diff --git a/contrib/llvm/lib/Analysis/PHITransAddr.cpp b/contrib/llvm/lib/Analysis/PHITransAddr.cpp new file mode 100644 index 0000000..8e4fa03 --- /dev/null +++ b/contrib/llvm/lib/Analysis/PHITransAddr.cpp @@ -0,0 +1,429 @@ +//===- PHITransAddr.cpp - PHI Translation for Addresses -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PHITransAddr class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +static bool CanPHITrans(Instruction *Inst) { + if (isa<PHINode>(Inst) || + isa<BitCastInst>(Inst) || + isa<GetElementPtrInst>(Inst)) + return true; + + if (Inst->getOpcode() == Instruction::Add && + isa<ConstantInt>(Inst->getOperand(1))) + return true; + + // cerr << "MEMDEP: Could not PHI translate: " << *Pointer; + // if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst)) + // cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0); + return false; +} + +void PHITransAddr::dump() const { + if (Addr == 0) { + dbgs() << "PHITransAddr: null\n"; + return; + } + dbgs() << "PHITransAddr: " << *Addr << "\n"; + for (unsigned i = 0, e = InstInputs.size(); i != e; ++i) + dbgs() << " Input #" << i << " is " << *InstInputs[i] << "\n"; +} + + +static bool VerifySubExpr(Value *Expr, + SmallVectorImpl<Instruction*> &InstInputs) { + // If this is a non-instruction value, there is nothing to do. + Instruction *I = dyn_cast<Instruction>(Expr); + if (I == 0) return true; + + // If it's an instruction, it is either in Tmp or its operands recursively + // are. + SmallVectorImpl<Instruction*>::iterator Entry = + std::find(InstInputs.begin(), InstInputs.end(), I); + if (Entry != InstInputs.end()) { + InstInputs.erase(Entry); + return true; + } + + // If it isn't in the InstInputs list it is a subexpr incorporated into the + // address. Sanity check that it is phi translatable. + if (!CanPHITrans(I)) { + errs() << "Non phi translatable instruction found in PHITransAddr, either " + "something is missing from InstInputs or CanPHITrans is wrong:\n"; + errs() << *I << '\n'; + return false; + } + + // Validate the operands of the instruction. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (!VerifySubExpr(I->getOperand(i), InstInputs)) + return false; + + return true; +} + +/// Verify - Check internal consistency of this data structure. If the +/// structure is valid, it returns true. If invalid, it prints errors and +/// returns false. +bool PHITransAddr::Verify() const { + if (Addr == 0) return true; + + SmallVector<Instruction*, 8> Tmp(InstInputs.begin(), InstInputs.end()); + + if (!VerifySubExpr(Addr, Tmp)) + return false; + + if (!Tmp.empty()) { + errs() << "PHITransAddr inconsistent, contains extra instructions:\n"; + for (unsigned i = 0, e = InstInputs.size(); i != e; ++i) + errs() << " InstInput #" << i << " is " << *InstInputs[i] << "\n"; + return false; + } + + // a-ok. + return true; +} + + +/// IsPotentiallyPHITranslatable - If this needs PHI translation, return true +/// if we have some hope of doing it. This should be used as a filter to +/// avoid calling PHITranslateValue in hopeless situations. +bool PHITransAddr::IsPotentiallyPHITranslatable() const { + // If the input value is not an instruction, or if it is not defined in CurBB, + // then we don't need to phi translate it. + Instruction *Inst = dyn_cast<Instruction>(Addr); + return Inst == 0 || CanPHITrans(Inst); +} + + +static void RemoveInstInputs(Value *V, + SmallVectorImpl<Instruction*> &InstInputs) { + Instruction *I = dyn_cast<Instruction>(V); + if (I == 0) return; + + // If the instruction is in the InstInputs list, remove it. + SmallVectorImpl<Instruction*>::iterator Entry = + std::find(InstInputs.begin(), InstInputs.end(), I); + if (Entry != InstInputs.end()) { + InstInputs.erase(Entry); + return; + } + + assert(!isa<PHINode>(I) && "Error, removing something that isn't an input"); + + // Otherwise, it must have instruction inputs itself. Zap them recursively. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(i))) + RemoveInstInputs(Op, InstInputs); + } +} + +Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, + BasicBlock *PredBB, + const DominatorTree *DT) { + // If this is a non-instruction value, it can't require PHI translation. + Instruction *Inst = dyn_cast<Instruction>(V); + if (Inst == 0) return V; + + // Determine whether 'Inst' is an input to our PHI translatable expression. + bool isInput = std::count(InstInputs.begin(), InstInputs.end(), Inst); + + // Handle inputs instructions if needed. + if (isInput) { + if (Inst->getParent() != CurBB) { + // If it is an input defined in a different block, then it remains an + // input. + return Inst; + } + + // If 'Inst' is defined in this block and is an input that needs to be phi + // translated, we need to incorporate the value into the expression or fail. + + // In either case, the instruction itself isn't an input any longer. + InstInputs.erase(std::find(InstInputs.begin(), InstInputs.end(), Inst)); + + // If this is a PHI, go ahead and translate it. + if (PHINode *PN = dyn_cast<PHINode>(Inst)) + return AddAsInput(PN->getIncomingValueForBlock(PredBB)); + + // If this is a non-phi value, and it is analyzable, we can incorporate it + // into the expression by making all instruction operands be inputs. + if (!CanPHITrans(Inst)) + return 0; + + // All instruction operands are now inputs (and of course, they may also be + // defined in this block, so they may need to be phi translated themselves. + for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) + if (Instruction *Op = dyn_cast<Instruction>(Inst->getOperand(i))) + InstInputs.push_back(Op); + } + + // Ok, it must be an intermediate result (either because it started that way + // or because we just incorporated it into the expression). See if its + // operands need to be phi translated, and if so, reconstruct it. + + if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) { + Value *PHIIn = PHITranslateSubExpr(BC->getOperand(0), CurBB, PredBB, DT); + if (PHIIn == 0) return 0; + if (PHIIn == BC->getOperand(0)) + return BC; + + // Find an available version of this cast. + + // Constants are trivial to find. + if (Constant *C = dyn_cast<Constant>(PHIIn)) + return AddAsInput(ConstantExpr::getBitCast(C, BC->getType())); + + // Otherwise we have to see if a bitcasted version of the incoming pointer + // is available. If so, we can use it, otherwise we have to fail. + for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end(); + UI != E; ++UI) { + if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) + if (BCI->getType() == BC->getType() && + (!DT || DT->dominates(BCI->getParent(), PredBB))) + return BCI; + } + return 0; + } + + // Handle getelementptr with at least one PHI translatable operand. + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { + SmallVector<Value*, 8> GEPOps; + bool AnyChanged = false; + for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { + Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB, DT); + if (GEPOp == 0) return 0; + + AnyChanged |= GEPOp != GEP->getOperand(i); + GEPOps.push_back(GEPOp); + } + + if (!AnyChanged) + return GEP; + + // Simplify the GEP to handle 'gep x, 0' -> x etc. + if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD)) { + for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) + RemoveInstInputs(GEPOps[i], InstInputs); + + return AddAsInput(V); + } + + // Scan to see if we have this GEP available. + Value *APHIOp = GEPOps[0]; + for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end(); + UI != E; ++UI) { + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI)) + if (GEPI->getType() == GEP->getType() && + GEPI->getNumOperands() == GEPOps.size() && + GEPI->getParent()->getParent() == CurBB->getParent() && + (!DT || DT->dominates(GEPI->getParent(), PredBB))) { + bool Mismatch = false; + for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) + if (GEPI->getOperand(i) != GEPOps[i]) { + Mismatch = true; + break; + } + if (!Mismatch) + return GEPI; + } + } + return 0; + } + + // Handle add with a constant RHS. + if (Inst->getOpcode() == Instruction::Add && + isa<ConstantInt>(Inst->getOperand(1))) { + // PHI translate the LHS. + Constant *RHS = cast<ConstantInt>(Inst->getOperand(1)); + bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap(); + bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap(); + + Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT); + if (LHS == 0) return 0; + + // If the PHI translated LHS is an add of a constant, fold the immediates. + if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS)) + if (BOp->getOpcode() == Instruction::Add) + if (ConstantInt *CI = dyn_cast<ConstantInt>(BOp->getOperand(1))) { + LHS = BOp->getOperand(0); + RHS = ConstantExpr::getAdd(RHS, CI); + isNSW = isNUW = false; + + // If the old 'LHS' was an input, add the new 'LHS' as an input. + if (std::count(InstInputs.begin(), InstInputs.end(), BOp)) { + RemoveInstInputs(BOp, InstInputs); + AddAsInput(LHS); + } + } + + // See if the add simplifies away. + if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD)) { + // If we simplified the operands, the LHS is no longer an input, but Res + // is. + RemoveInstInputs(LHS, InstInputs); + return AddAsInput(Res); + } + + // If we didn't modify the add, just return it. + if (LHS == Inst->getOperand(0) && RHS == Inst->getOperand(1)) + return Inst; + + // Otherwise, see if we have this add available somewhere. + for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end(); + UI != E; ++UI) { + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI)) + if (BO->getOpcode() == Instruction::Add && + BO->getOperand(0) == LHS && BO->getOperand(1) == RHS && + BO->getParent()->getParent() == CurBB->getParent() && + (!DT || DT->dominates(BO->getParent(), PredBB))) + return BO; + } + + return 0; + } + + // Otherwise, we failed. + return 0; +} + + +/// PHITranslateValue - PHI translate the current address up the CFG from +/// CurBB to Pred, updating our state to reflect any needed changes. If the +/// dominator tree DT is non-null, the translated value must dominate +/// PredBB. This returns true on failure and sets Addr to null. +bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB, + const DominatorTree *DT) { + assert(Verify() && "Invalid PHITransAddr!"); + Addr = PHITranslateSubExpr(Addr, CurBB, PredBB, DT); + assert(Verify() && "Invalid PHITransAddr!"); + + if (DT) { + // Make sure the value is live in the predecessor. + if (Instruction *Inst = dyn_cast_or_null<Instruction>(Addr)) + if (!DT->dominates(Inst->getParent(), PredBB)) + Addr = 0; + } + + return Addr == 0; +} + +/// PHITranslateWithInsertion - PHI translate this value into the specified +/// predecessor block, inserting a computation of the value if it is +/// unavailable. +/// +/// All newly created instructions are added to the NewInsts list. This +/// returns null on failure. +/// +Value *PHITransAddr:: +PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB, + const DominatorTree &DT, + SmallVectorImpl<Instruction*> &NewInsts) { + unsigned NISize = NewInsts.size(); + + // Attempt to PHI translate with insertion. + Addr = InsertPHITranslatedSubExpr(Addr, CurBB, PredBB, DT, NewInsts); + + // If successful, return the new value. + if (Addr) return Addr; + + // If not, destroy any intermediate instructions inserted. + while (NewInsts.size() != NISize) + NewInsts.pop_back_val()->eraseFromParent(); + return 0; +} + + +/// InsertPHITranslatedPointer - Insert a computation of the PHI translated +/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB +/// block. All newly created instructions are added to the NewInsts list. +/// This returns null on failure. +/// +Value *PHITransAddr:: +InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, + BasicBlock *PredBB, const DominatorTree &DT, + SmallVectorImpl<Instruction*> &NewInsts) { + // See if we have a version of this value already available and dominating + // PredBB. If so, there is no need to insert a new instance of it. + PHITransAddr Tmp(InVal, TD); + if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT)) + return Tmp.getAddr(); + + // If we don't have an available version of this value, it must be an + // instruction. + Instruction *Inst = cast<Instruction>(InVal); + + // Handle bitcast of PHI translatable value. + if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) { + Value *OpVal = InsertPHITranslatedSubExpr(BC->getOperand(0), + CurBB, PredBB, DT, NewInsts); + if (OpVal == 0) return 0; + + // Otherwise insert a bitcast at the end of PredBB. + BitCastInst *New = new BitCastInst(OpVal, InVal->getType(), + InVal->getName()+".phi.trans.insert", + PredBB->getTerminator()); + NewInsts.push_back(New); + return New; + } + + // Handle getelementptr with at least one PHI operand. + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { + SmallVector<Value*, 8> GEPOps; + BasicBlock *CurBB = GEP->getParent(); + for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { + Value *OpVal = InsertPHITranslatedSubExpr(GEP->getOperand(i), + CurBB, PredBB, DT, NewInsts); + if (OpVal == 0) return 0; + GEPOps.push_back(OpVal); + } + + GetElementPtrInst *Result = + GetElementPtrInst::Create(GEPOps[0], GEPOps.begin()+1, GEPOps.end(), + InVal->getName()+".phi.trans.insert", + PredBB->getTerminator()); + Result->setIsInBounds(GEP->isInBounds()); + NewInsts.push_back(Result); + return Result; + } + +#if 0 + // FIXME: This code works, but it is unclear that we actually want to insert + // a big chain of computation in order to make a value available in a block. + // This needs to be evaluated carefully to consider its cost trade offs. + + // Handle add with a constant RHS. + if (Inst->getOpcode() == Instruction::Add && + isa<ConstantInt>(Inst->getOperand(1))) { + // PHI translate the LHS. + Value *OpVal = InsertPHITranslatedSubExpr(Inst->getOperand(0), + CurBB, PredBB, DT, NewInsts); + if (OpVal == 0) return 0; + + BinaryOperator *Res = BinaryOperator::CreateAdd(OpVal, Inst->getOperand(1), + InVal->getName()+".phi.trans.insert", + PredBB->getTerminator()); + Res->setHasNoSignedWrap(cast<BinaryOperator>(Inst)->hasNoSignedWrap()); + Res->setHasNoUnsignedWrap(cast<BinaryOperator>(Inst)->hasNoUnsignedWrap()); + NewInsts.push_back(Res); + return Res; + } +#endif + + return 0; +} diff --git a/contrib/llvm/lib/Analysis/PointerTracking.cpp b/contrib/llvm/lib/Analysis/PointerTracking.cpp new file mode 100644 index 0000000..07f4682 --- /dev/null +++ b/contrib/llvm/lib/Analysis/PointerTracking.cpp @@ -0,0 +1,316 @@ +//===- PointerTracking.cpp - Pointer Bounds Tracking ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements tracking of pointer bounds. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/PointerTracking.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Constants.h" +#include "llvm/Module.h" +#include "llvm/Value.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +using namespace llvm; + +char PointerTracking::ID = 0; +PointerTracking::PointerTracking() : FunctionPass(ID) {} + +bool PointerTracking::runOnFunction(Function &F) { + predCache.clear(); + assert(analyzing.empty()); + FF = &F; + TD = getAnalysisIfAvailable<TargetData>(); + SE = &getAnalysis<ScalarEvolution>(); + LI = &getAnalysis<LoopInfo>(); + DT = &getAnalysis<DominatorTree>(); + return false; +} + +void PointerTracking::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredTransitive<DominatorTree>(); + AU.addRequiredTransitive<LoopInfo>(); + AU.addRequiredTransitive<ScalarEvolution>(); + AU.setPreservesAll(); +} + +bool PointerTracking::doInitialization(Module &M) { + const Type *PTy = Type::getInt8PtrTy(M.getContext()); + + // Find calloc(i64, i64) or calloc(i32, i32). + callocFunc = M.getFunction("calloc"); + if (callocFunc) { + const FunctionType *Ty = callocFunc->getFunctionType(); + + std::vector<const Type*> args, args2; + args.push_back(Type::getInt64Ty(M.getContext())); + args.push_back(Type::getInt64Ty(M.getContext())); + args2.push_back(Type::getInt32Ty(M.getContext())); + args2.push_back(Type::getInt32Ty(M.getContext())); + const FunctionType *Calloc1Type = + FunctionType::get(PTy, args, false); + const FunctionType *Calloc2Type = + FunctionType::get(PTy, args2, false); + if (Ty != Calloc1Type && Ty != Calloc2Type) + callocFunc = 0; // Give up + } + + // Find realloc(i8*, i64) or realloc(i8*, i32). + reallocFunc = M.getFunction("realloc"); + if (reallocFunc) { + const FunctionType *Ty = reallocFunc->getFunctionType(); + std::vector<const Type*> args, args2; + args.push_back(PTy); + args.push_back(Type::getInt64Ty(M.getContext())); + args2.push_back(PTy); + args2.push_back(Type::getInt32Ty(M.getContext())); + + const FunctionType *Realloc1Type = + FunctionType::get(PTy, args, false); + const FunctionType *Realloc2Type = + FunctionType::get(PTy, args2, false); + if (Ty != Realloc1Type && Ty != Realloc2Type) + reallocFunc = 0; // Give up + } + return false; +} + +// Calculates the number of elements allocated for pointer P, +// the type of the element is stored in Ty. +const SCEV *PointerTracking::computeAllocationCount(Value *P, + const Type *&Ty) const { + Value *V = P->stripPointerCasts(); + if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + Value *arraySize = AI->getArraySize(); + Ty = AI->getAllocatedType(); + // arraySize elements of type Ty. + return SE->getSCEV(arraySize); + } + + if (CallInst *CI = extractMallocCall(V)) { + Value *arraySize = getMallocArraySize(CI, TD); + const Type* AllocTy = getMallocAllocatedType(CI); + if (!AllocTy || !arraySize) return SE->getCouldNotCompute(); + Ty = AllocTy; + // arraySize elements of type Ty. + return SE->getSCEV(arraySize); + } + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { + if (GV->hasDefinitiveInitializer()) { + Constant *C = GV->getInitializer(); + if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) { + Ty = ATy->getElementType(); + return SE->getConstant(Type::getInt32Ty(P->getContext()), + ATy->getNumElements()); + } + } + Ty = GV->getType(); + return SE->getConstant(Type::getInt32Ty(P->getContext()), 1); + //TODO: implement more tracking for globals + } + + if (CallInst *CI = dyn_cast<CallInst>(V)) { + CallSite CS(CI); + Function *F = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); + const Loop *L = LI->getLoopFor(CI->getParent()); + if (F == callocFunc) { + Ty = Type::getInt8Ty(P->getContext()); + // calloc allocates arg0*arg1 bytes. + return SE->getSCEVAtScope(SE->getMulExpr(SE->getSCEV(CS.getArgument(0)), + SE->getSCEV(CS.getArgument(1))), + L); + } else if (F == reallocFunc) { + Ty = Type::getInt8Ty(P->getContext()); + // realloc allocates arg1 bytes. + return SE->getSCEVAtScope(CS.getArgument(1), L); + } + } + + return SE->getCouldNotCompute(); +} + +Value *PointerTracking::computeAllocationCountValue(Value *P, const Type *&Ty) const +{ + Value *V = P->stripPointerCasts(); + if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + Ty = AI->getAllocatedType(); + // arraySize elements of type Ty. + return AI->getArraySize(); + } + + if (CallInst *CI = extractMallocCall(V)) { + Ty = getMallocAllocatedType(CI); + if (!Ty) + return 0; + Value *arraySize = getMallocArraySize(CI, TD); + if (!arraySize) { + Ty = Type::getInt8Ty(P->getContext()); + return CI->getArgOperand(0); + } + // arraySize elements of type Ty. + return arraySize; + } + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { + if (GV->hasDefinitiveInitializer()) { + Constant *C = GV->getInitializer(); + if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) { + Ty = ATy->getElementType(); + return ConstantInt::get(Type::getInt32Ty(P->getContext()), + ATy->getNumElements()); + } + } + Ty = cast<PointerType>(GV->getType())->getElementType(); + return ConstantInt::get(Type::getInt32Ty(P->getContext()), 1); + //TODO: implement more tracking for globals + } + + if (CallInst *CI = dyn_cast<CallInst>(V)) { + CallSite CS(CI); + Function *F = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); + if (F == reallocFunc) { + Ty = Type::getInt8Ty(P->getContext()); + // realloc allocates arg1 bytes. + return CS.getArgument(1); + } + } + + return 0; +} + +// Calculates the number of elements of type Ty allocated for P. +const SCEV *PointerTracking::computeAllocationCountForType(Value *P, + const Type *Ty) + const { + const Type *elementTy; + const SCEV *Count = computeAllocationCount(P, elementTy); + if (isa<SCEVCouldNotCompute>(Count)) + return Count; + if (elementTy == Ty) + return Count; + + if (!TD) // need TargetData from this point forward + return SE->getCouldNotCompute(); + + uint64_t elementSize = TD->getTypeAllocSize(elementTy); + uint64_t wantSize = TD->getTypeAllocSize(Ty); + if (elementSize == wantSize) + return Count; + if (elementSize % wantSize) //fractional counts not possible + return SE->getCouldNotCompute(); + return SE->getMulExpr(Count, SE->getConstant(Count->getType(), + elementSize/wantSize)); +} + +const SCEV *PointerTracking::getAllocationElementCount(Value *V) const { + // We only deal with pointers. + const PointerType *PTy = cast<PointerType>(V->getType()); + return computeAllocationCountForType(V, PTy->getElementType()); +} + +const SCEV *PointerTracking::getAllocationSizeInBytes(Value *V) const { + return computeAllocationCountForType(V, Type::getInt8Ty(V->getContext())); +} + +// Helper for isLoopGuardedBy that checks the swapped and inverted predicate too +enum SolverResult PointerTracking::isLoopGuardedBy(const Loop *L, + Predicate Pred, + const SCEV *A, + const SCEV *B) const { + if (SE->isLoopEntryGuardedByCond(L, Pred, A, B)) + return AlwaysTrue; + Pred = ICmpInst::getSwappedPredicate(Pred); + if (SE->isLoopEntryGuardedByCond(L, Pred, B, A)) + return AlwaysTrue; + + Pred = ICmpInst::getInversePredicate(Pred); + if (SE->isLoopEntryGuardedByCond(L, Pred, B, A)) + return AlwaysFalse; + Pred = ICmpInst::getSwappedPredicate(Pred); + if (SE->isLoopEntryGuardedByCond(L, Pred, A, B)) + return AlwaysTrue; + return Unknown; +} + +enum SolverResult PointerTracking::checkLimits(const SCEV *Offset, + const SCEV *Limit, + BasicBlock *BB) +{ + //FIXME: merge implementation + return Unknown; +} + +void PointerTracking::getPointerOffset(Value *Pointer, Value *&Base, + const SCEV *&Limit, + const SCEV *&Offset) const +{ + Pointer = Pointer->stripPointerCasts(); + Base = Pointer->getUnderlyingObject(); + Limit = getAllocationSizeInBytes(Base); + if (isa<SCEVCouldNotCompute>(Limit)) { + Base = 0; + Offset = Limit; + return; + } + + Offset = SE->getMinusSCEV(SE->getSCEV(Pointer), SE->getSCEV(Base)); + if (isa<SCEVCouldNotCompute>(Offset)) { + Base = 0; + Limit = Offset; + } +} + +void PointerTracking::print(raw_ostream &OS, const Module* M) const { + // Calling some PT methods may cause caches to be updated, however + // this should be safe for the same reason its safe for SCEV. + PointerTracking &PT = *const_cast<PointerTracking*>(this); + for (inst_iterator I=inst_begin(*FF), E=inst_end(*FF); I != E; ++I) { + if (!I->getType()->isPointerTy()) + continue; + Value *Base; + const SCEV *Limit, *Offset; + getPointerOffset(&*I, Base, Limit, Offset); + if (!Base) + continue; + + if (Base == &*I) { + const SCEV *S = getAllocationElementCount(Base); + OS << *Base << " ==> " << *S << " elements, "; + OS << *Limit << " bytes allocated\n"; + continue; + } + OS << &*I << " -- base: " << *Base; + OS << " offset: " << *Offset; + + enum SolverResult res = PT.checkLimits(Offset, Limit, I->getParent()); + switch (res) { + case AlwaysTrue: + OS << " always safe\n"; + break; + case AlwaysFalse: + OS << " always unsafe\n"; + break; + case Unknown: + OS << " <<unknown>>\n"; + break; + } + } +} + +INITIALIZE_PASS(PointerTracking, "pointertracking", + "Track pointer bounds", false, true); diff --git a/contrib/llvm/lib/Analysis/PostDominators.cpp b/contrib/llvm/lib/Analysis/PostDominators.cpp new file mode 100644 index 0000000..cbe8d18 --- /dev/null +++ b/contrib/llvm/lib/Analysis/PostDominators.cpp @@ -0,0 +1,98 @@ +//===- PostDominators.cpp - Post-Dominator Calculation --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the post-dominator construction algorithms. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "postdomtree" + +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Instructions.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/Analysis/DominatorInternals.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// PostDominatorTree Implementation +//===----------------------------------------------------------------------===// + +char PostDominatorTree::ID = 0; +char PostDominanceFrontier::ID = 0; +INITIALIZE_PASS(PostDominatorTree, "postdomtree", + "Post-Dominator Tree Construction", true, true); + +bool PostDominatorTree::runOnFunction(Function &F) { + DT->recalculate(F); + return false; +} + +PostDominatorTree::~PostDominatorTree() { + delete DT; +} + +void PostDominatorTree::print(raw_ostream &OS, const Module *) const { + DT->print(OS); +} + + +FunctionPass* llvm::createPostDomTree() { + return new PostDominatorTree(); +} + +//===----------------------------------------------------------------------===// +// PostDominanceFrontier Implementation +//===----------------------------------------------------------------------===// + +INITIALIZE_PASS(PostDominanceFrontier, "postdomfrontier", + "Post-Dominance Frontier Construction", true, true); + +const DominanceFrontier::DomSetType & +PostDominanceFrontier::calculate(const PostDominatorTree &DT, + const DomTreeNode *Node) { + // Loop over CFG successors to calculate DFlocal[Node] + BasicBlock *BB = Node->getBlock(); + DomSetType &S = Frontiers[BB]; // The new set to fill in... + if (getRoots().empty()) return S; + + if (BB) + for (pred_iterator SI = pred_begin(BB), SE = pred_end(BB); + SI != SE; ++SI) { + BasicBlock *P = *SI; + // Does Node immediately dominate this predecessor? + DomTreeNode *SINode = DT[P]; + if (SINode && SINode->getIDom() != Node) + S.insert(P); + } + + // At this point, S is DFlocal. Now we union in DFup's of our children... + // Loop through and visit the nodes that Node immediately dominates (Node's + // children in the IDomTree) + // + for (DomTreeNode::const_iterator + NI = Node->begin(), NE = Node->end(); NI != NE; ++NI) { + DomTreeNode *IDominee = *NI; + const DomSetType &ChildDF = calculate(DT, IDominee); + + DomSetType::const_iterator CDFI = ChildDF.begin(), CDFE = ChildDF.end(); + for (; CDFI != CDFE; ++CDFI) { + if (!DT.properlyDominates(Node, DT[*CDFI])) + S.insert(*CDFI); + } + } + + return S; +} + +FunctionPass* llvm::createPostDomFrontier() { + return new PostDominanceFrontier(); +} diff --git a/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp b/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp new file mode 100644 index 0000000..ecc0a18 --- /dev/null +++ b/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp @@ -0,0 +1,421 @@ +//===- ProfileEstimatorPass.cpp - LLVM Pass to estimate profile info ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a concrete implementation of profiling information that +// estimates the profiling information in a very crude and unimaginative way. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "profile-estimator" +#include "llvm/Pass.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" +using namespace llvm; + +static cl::opt<double> +LoopWeight( + "profile-estimator-loop-weight", cl::init(10), + cl::value_desc("loop-weight"), + cl::desc("Number of loop executions used for profile-estimator") +); + +namespace { + class ProfileEstimatorPass : public FunctionPass, public ProfileInfo { + double ExecCount; + LoopInfo *LI; + std::set<BasicBlock*> BBToVisit; + std::map<Loop*,double> LoopExitWeights; + std::map<Edge,double> MinimalWeight; + public: + static char ID; // Class identification, replacement for typeinfo + explicit ProfileEstimatorPass(const double execcount = 0) + : FunctionPass(ID), ExecCount(execcount) { + if (execcount == 0) ExecCount = LoopWeight; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<LoopInfo>(); + } + + virtual const char *getPassName() const { + return "Profiling information estimator"; + } + + /// run - Estimate the profile information from the specified file. + virtual bool runOnFunction(Function &F); + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &ProfileInfo::ID) + return (ProfileInfo*)this; + return this; + } + + virtual void recurseBasicBlock(BasicBlock *BB); + + void inline printEdgeWeight(Edge); + }; +} // End of anonymous namespace + +char ProfileEstimatorPass::ID = 0; +INITIALIZE_AG_PASS(ProfileEstimatorPass, ProfileInfo, "profile-estimator", + "Estimate profiling information", false, true, false); + +namespace llvm { + char &ProfileEstimatorPassID = ProfileEstimatorPass::ID; + + FunctionPass *createProfileEstimatorPass() { + return new ProfileEstimatorPass(); + } + + /// createProfileEstimatorPass - This function returns a Pass that estimates + /// profiling information using the given loop execution count. + Pass *createProfileEstimatorPass(const unsigned execcount) { + return new ProfileEstimatorPass(execcount); + } +} + +static double ignoreMissing(double w) { + if (w == ProfileInfo::MissingValue) return 0; + return w; +} + +static void inline printEdgeError(ProfileInfo::Edge e, const char *M) { + DEBUG(dbgs() << "-- Edge " << e << " is not calculated, " << M << "\n"); +} + +void inline ProfileEstimatorPass::printEdgeWeight(Edge E) { + DEBUG(dbgs() << "-- Weight of Edge " << E << ":" + << format("%20.20g", getEdgeWeight(E)) << "\n"); +} + +// recurseBasicBlock() - This calculates the ProfileInfo estimation for a +// single block and then recurses into the successors. +// The algorithm preserves the flow condition, meaning that the sum of the +// weight of the incoming edges must be equal the block weight which must in +// turn be equal to the sume of the weights of the outgoing edges. +// Since the flow of an block is deterimined from the current state of the +// flow, once an edge has a flow assigned this flow is never changed again, +// otherwise it would be possible to violate the flow condition in another +// block. +void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) { + + // Break the recursion if this BasicBlock was already visited. + if (BBToVisit.find(BB) == BBToVisit.end()) return; + + // Read the LoopInfo for this block. + bool BBisHeader = LI->isLoopHeader(BB); + Loop* BBLoop = LI->getLoopFor(BB); + + // To get the block weight, read all incoming edges. + double BBWeight = 0; + std::set<BasicBlock*> ProcessedPreds; + for ( pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + bbi != bbe; ++bbi ) { + // If this block was not considered already, add weight. + Edge edge = getEdge(*bbi,BB); + double w = getEdgeWeight(edge); + if (ProcessedPreds.insert(*bbi).second) { + BBWeight += ignoreMissing(w); + } + // If this block is a loop header and the predecessor is contained in this + // loop, thus the edge is a backedge, continue and do not check if the + // value is valid. + if (BBisHeader && BBLoop->contains(*bbi)) { + printEdgeError(edge, "but is backedge, continueing"); + continue; + } + // If the edges value is missing (and this is no loop header, and this is + // no backedge) return, this block is currently non estimatable. + if (w == MissingValue) { + printEdgeError(edge, "returning"); + return; + } + } + if (getExecutionCount(BB) != MissingValue) { + BBWeight = getExecutionCount(BB); + } + + // Fetch all necessary information for current block. + SmallVector<Edge, 8> ExitEdges; + SmallVector<Edge, 8> Edges; + if (BBLoop) { + BBLoop->getExitEdges(ExitEdges); + } + + // If this is a loop header, consider the following: + // Exactly the flow that is entering this block, must exit this block too. So + // do the following: + // *) get all the exit edges, read the flow that is already leaving this + // loop, remember the edges that do not have any flow on them right now. + // (The edges that have already flow on them are most likely exiting edges of + // other loops, do not touch those flows because the previously caclulated + // loopheaders would not be exact anymore.) + // *) In case there is not a single exiting edge left, create one at the loop + // latch to prevent the flow from building up in the loop. + // *) Take the flow that is not leaving the loop already and distribute it on + // the remaining exiting edges. + // (This ensures that all flow that enters the loop also leaves it.) + // *) Increase the flow into the loop by increasing the weight of this block. + // There is at least one incoming backedge that will bring us this flow later + // on. (So that the flow condition in this node is valid again.) + if (BBisHeader) { + double incoming = BBWeight; + // Subtract the flow leaving the loop. + std::set<Edge> ProcessedExits; + for (SmallVector<Edge, 8>::iterator ei = ExitEdges.begin(), + ee = ExitEdges.end(); ei != ee; ++ei) { + if (ProcessedExits.insert(*ei).second) { + double w = getEdgeWeight(*ei); + if (w == MissingValue) { + Edges.push_back(*ei); + // Check if there is a necessary minimal weight, if yes, subtract it + // from weight. + if (MinimalWeight.find(*ei) != MinimalWeight.end()) { + incoming -= MinimalWeight[*ei]; + DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n"); + } + } else { + incoming -= w; + } + } + } + // If no exit edges, create one: + if (Edges.size() == 0) { + BasicBlock *Latch = BBLoop->getLoopLatch(); + if (Latch) { + Edge edge = getEdge(Latch,0); + EdgeInformation[BB->getParent()][edge] = BBWeight; + printEdgeWeight(edge); + edge = getEdge(Latch, BB); + EdgeInformation[BB->getParent()][edge] = BBWeight * ExecCount; + printEdgeWeight(edge); + } + } + + // Distribute remaining weight to the exting edges. To prevent fractions + // from building up and provoking precision problems the weight which is to + // be distributed is split and the rounded, the last edge gets a somewhat + // bigger value, but we are close enough for an estimation. + double fraction = floor(incoming/Edges.size()); + for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end(); + ei != ee; ++ei) { + double w = 0; + if (ei != (ee-1)) { + w = fraction; + incoming -= fraction; + } else { + w = incoming; + } + EdgeInformation[BB->getParent()][*ei] += w; + // Read necessary minimal weight. + if (MinimalWeight.find(*ei) != MinimalWeight.end()) { + EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei]; + DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n"); + } + printEdgeWeight(*ei); + + // Add minimal weight to paths to all exit edges, this is used to ensure + // that enough flow is reaching this edges. + Path p; + const BasicBlock *Dest = GetPath(BB, (*ei).first, p, GetPathToDest); + while (Dest != BB) { + const BasicBlock *Parent = p.find(Dest)->second; + Edge e = getEdge(Parent, Dest); + if (MinimalWeight.find(e) == MinimalWeight.end()) { + MinimalWeight[e] = 0; + } + MinimalWeight[e] += w; + DEBUG(dbgs() << "Minimal Weight for " << e << ": " << format("%.20g",MinimalWeight[e]) << "\n"); + Dest = Parent; + } + } + // Increase flow into the loop. + BBWeight *= (ExecCount+1); + } + + BlockInformation[BB->getParent()][BB] = BBWeight; + // Up until now we considered only the loop exiting edges, now we have a + // definite block weight and must distribute this onto the outgoing edges. + // Since there may be already flow attached to some of the edges, read this + // flow first and remember the edges that have still now flow attached. + Edges.clear(); + std::set<BasicBlock*> ProcessedSuccs; + + succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + // Also check for (BB,0) edges that may already contain some flow. (But only + // in case there are no successors.) + if (bbi == bbe) { + Edge edge = getEdge(BB,0); + EdgeInformation[BB->getParent()][edge] = BBWeight; + printEdgeWeight(edge); + } + for ( ; bbi != bbe; ++bbi ) { + if (ProcessedSuccs.insert(*bbi).second) { + Edge edge = getEdge(BB,*bbi); + double w = getEdgeWeight(edge); + if (w != MissingValue) { + BBWeight -= getEdgeWeight(edge); + } else { + Edges.push_back(edge); + // If minimal weight is necessary, reserve weight by subtracting weight + // from block weight, this is readded later on. + if (MinimalWeight.find(edge) != MinimalWeight.end()) { + BBWeight -= MinimalWeight[edge]; + DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[edge]) << " at " << edge << "\n"); + } + } + } + } + + double fraction = floor(BBWeight/Edges.size()); + // Finally we know what flow is still not leaving the block, distribute this + // flow onto the empty edges. + for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end(); + ei != ee; ++ei) { + if (ei != (ee-1)) { + EdgeInformation[BB->getParent()][*ei] += fraction; + BBWeight -= fraction; + } else { + EdgeInformation[BB->getParent()][*ei] += BBWeight; + } + // Readd minial necessary weight. + if (MinimalWeight.find(*ei) != MinimalWeight.end()) { + EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei]; + DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n"); + } + printEdgeWeight(*ei); + } + + // This block is visited, mark this before the recursion. + BBToVisit.erase(BB); + + // Recurse into successors. + for (succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + bbi != bbe; ++bbi) { + recurseBasicBlock(*bbi); + } +} + +bool ProfileEstimatorPass::runOnFunction(Function &F) { + if (F.isDeclaration()) return false; + + // Fetch LoopInfo and clear ProfileInfo for this function. + LI = &getAnalysis<LoopInfo>(); + FunctionInformation.erase(&F); + BlockInformation[&F].clear(); + EdgeInformation[&F].clear(); + + // Mark all blocks as to visit. + for (Function::iterator bi = F.begin(), be = F.end(); bi != be; ++bi) + BBToVisit.insert(bi); + + // Clear Minimal Edges. + MinimalWeight.clear(); + + DEBUG(dbgs() << "Working on function " << F.getNameStr() << "\n"); + + // Since the entry block is the first one and has no predecessors, the edge + // (0,entry) is inserted with the starting weight of 1. + BasicBlock *entry = &F.getEntryBlock(); + BlockInformation[&F][entry] = pow(2.0, 32.0); + Edge edge = getEdge(0,entry); + EdgeInformation[&F][edge] = BlockInformation[&F][entry]; + printEdgeWeight(edge); + + // Since recurseBasicBlock() maybe returns with a block which was not fully + // estimated, use recurseBasicBlock() until everything is calculated. + bool cleanup = false; + recurseBasicBlock(entry); + while (BBToVisit.size() > 0 && !cleanup) { + // Remember number of open blocks, this is later used to check if progress + // was made. + unsigned size = BBToVisit.size(); + + // Try to calculate all blocks in turn. + for (std::set<BasicBlock*>::iterator bi = BBToVisit.begin(), + be = BBToVisit.end(); bi != be; ++bi) { + recurseBasicBlock(*bi); + // If at least one block was finished, break because iterator may be + // invalid. + if (BBToVisit.size() < size) break; + } + + // If there was not a single block resolved, make some assumptions. + if (BBToVisit.size() == size) { + bool found = false; + for (std::set<BasicBlock*>::iterator BBI = BBToVisit.begin(), BBE = BBToVisit.end(); + (BBI != BBE) && (!found); ++BBI) { + BasicBlock *BB = *BBI; + // Try each predecessor if it can be assumend. + for (pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + (bbi != bbe) && (!found); ++bbi) { + Edge e = getEdge(*bbi,BB); + double w = getEdgeWeight(e); + // Check that edge from predecessor is still free. + if (w == MissingValue) { + // Check if there is a circle from this block to predecessor. + Path P; + const BasicBlock *Dest = GetPath(BB, *bbi, P, GetPathToDest); + if (Dest != *bbi) { + // If there is no circle, just set edge weight to 0 + EdgeInformation[&F][e] = 0; + DEBUG(dbgs() << "Assuming edge weight: "); + printEdgeWeight(e); + found = true; + } + } + } + } + if (!found) { + cleanup = true; + DEBUG(dbgs() << "No assumption possible in Fuction "<<F.getName()<<", setting all to zero\n"); + } + } + } + // In case there was no safe way to assume edges, set as a last measure, + // set _everything_ to zero. + if (cleanup) { + FunctionInformation[&F] = 0; + BlockInformation[&F].clear(); + EdgeInformation[&F].clear(); + for (Function::const_iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + const BasicBlock *BB = &(*FI); + BlockInformation[&F][BB] = 0; + const_pred_iterator predi = pred_begin(BB), prede = pred_end(BB); + if (predi == prede) { + Edge e = getEdge(0,BB); + setEdgeWeight(e,0); + } + for (;predi != prede; ++predi) { + Edge e = getEdge(*predi,BB); + setEdgeWeight(e,0); + } + succ_const_iterator succi = succ_begin(BB), succe = succ_end(BB); + if (succi == succe) { + Edge e = getEdge(BB,0); + setEdgeWeight(e,0); + } + for (;succi != succe; ++succi) { + Edge e = getEdge(*succi,BB); + setEdgeWeight(e,0); + } + } + } + + return false; +} diff --git a/contrib/llvm/lib/Analysis/ProfileInfo.cpp b/contrib/llvm/lib/Analysis/ProfileInfo.cpp new file mode 100644 index 0000000..fc7f286 --- /dev/null +++ b/contrib/llvm/lib/Analysis/ProfileInfo.cpp @@ -0,0 +1,1102 @@ +//===- ProfileInfo.cpp - Profile Info Interface ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the abstract ProfileInfo interface, and the default +// "no profile" implementation. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "profile-info" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Pass.h" +#include "llvm/Support/CFG.h" +#include "llvm/ADT/SmallSet.h" +#include <set> +#include <queue> +#include <limits> +using namespace llvm; + +// Register the ProfileInfo interface, providing a nice name to refer to. +static RegisterAnalysisGroup<ProfileInfo> Z("Profile Information"); + +namespace llvm { + +template <> +ProfileInfoT<MachineFunction, MachineBasicBlock>::ProfileInfoT() {} +template <> +ProfileInfoT<MachineFunction, MachineBasicBlock>::~ProfileInfoT() {} + +template <> +ProfileInfoT<Function, BasicBlock>::ProfileInfoT() { + MachineProfile = 0; +} +template <> +ProfileInfoT<Function, BasicBlock>::~ProfileInfoT() { + if (MachineProfile) delete MachineProfile; +} + +template<> +char ProfileInfoT<Function,BasicBlock>::ID = 0; + +template<> +char ProfileInfoT<MachineFunction, MachineBasicBlock>::ID = 0; + +template<> +const double ProfileInfoT<Function,BasicBlock>::MissingValue = -1; + +template<> const +double ProfileInfoT<MachineFunction, MachineBasicBlock>::MissingValue = -1; + +template<> double +ProfileInfoT<Function,BasicBlock>::getExecutionCount(const BasicBlock *BB) { + std::map<const Function*, BlockCounts>::iterator J = + BlockInformation.find(BB->getParent()); + if (J != BlockInformation.end()) { + BlockCounts::iterator I = J->second.find(BB); + if (I != J->second.end()) + return I->second; + } + + double Count = MissingValue; + + const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + + // Are there zero predecessors of this block? + if (PI == PE) { + Edge e = getEdge(0, BB); + Count = getEdgeWeight(e); + } else { + // Otherwise, if there are predecessors, the execution count of this block is + // the sum of the edge frequencies from the incoming edges. + std::set<const BasicBlock*> ProcessedPreds; + Count = 0; + for (; PI != PE; ++PI) { + const BasicBlock *P = *PI; + if (ProcessedPreds.insert(P).second) { + double w = getEdgeWeight(getEdge(P, BB)); + if (w == MissingValue) { + Count = MissingValue; + break; + } + Count += w; + } + } + } + + // If the predecessors did not suffice to get block weight, try successors. + if (Count == MissingValue) { + + succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB); + + // Are there zero successors of this block? + if (SI == SE) { + Edge e = getEdge(BB,0); + Count = getEdgeWeight(e); + } else { + std::set<const BasicBlock*> ProcessedSuccs; + Count = 0; + for (; SI != SE; ++SI) + if (ProcessedSuccs.insert(*SI).second) { + double w = getEdgeWeight(getEdge(BB, *SI)); + if (w == MissingValue) { + Count = MissingValue; + break; + } + Count += w; + } + } + } + + if (Count != MissingValue) BlockInformation[BB->getParent()][BB] = Count; + return Count; +} + +template<> +double ProfileInfoT<MachineFunction, MachineBasicBlock>:: + getExecutionCount(const MachineBasicBlock *MBB) { + std::map<const MachineFunction*, BlockCounts>::iterator J = + BlockInformation.find(MBB->getParent()); + if (J != BlockInformation.end()) { + BlockCounts::iterator I = J->second.find(MBB); + if (I != J->second.end()) + return I->second; + } + + return MissingValue; +} + +template<> +double ProfileInfoT<Function,BasicBlock>::getExecutionCount(const Function *F) { + std::map<const Function*, double>::iterator J = + FunctionInformation.find(F); + if (J != FunctionInformation.end()) + return J->second; + + // isDeclaration() is checked here and not at start of function to allow + // functions without a body still to have a execution count. + if (F->isDeclaration()) return MissingValue; + + double Count = getExecutionCount(&F->getEntryBlock()); + if (Count != MissingValue) FunctionInformation[F] = Count; + return Count; +} + +template<> +double ProfileInfoT<MachineFunction, MachineBasicBlock>:: + getExecutionCount(const MachineFunction *MF) { + std::map<const MachineFunction*, double>::iterator J = + FunctionInformation.find(MF); + if (J != FunctionInformation.end()) + return J->second; + + double Count = getExecutionCount(&MF->front()); + if (Count != MissingValue) FunctionInformation[MF] = Count; + return Count; +} + +template<> +void ProfileInfoT<Function,BasicBlock>:: + setExecutionCount(const BasicBlock *BB, double w) { + DEBUG(dbgs() << "Creating Block " << BB->getName() + << " (weight: " << format("%.20g",w) << ")\n"); + BlockInformation[BB->getParent()][BB] = w; +} + +template<> +void ProfileInfoT<MachineFunction, MachineBasicBlock>:: + setExecutionCount(const MachineBasicBlock *MBB, double w) { + DEBUG(dbgs() << "Creating Block " << MBB->getBasicBlock()->getName() + << " (weight: " << format("%.20g",w) << ")\n"); + BlockInformation[MBB->getParent()][MBB] = w; +} + +template<> +void ProfileInfoT<Function,BasicBlock>::addEdgeWeight(Edge e, double w) { + double oldw = getEdgeWeight(e); + assert (oldw != MissingValue && "Adding weight to Edge with no previous weight"); + DEBUG(dbgs() << "Adding to Edge " << e + << " (new weight: " << format("%.20g",oldw + w) << ")\n"); + EdgeInformation[getFunction(e)][e] = oldw + w; +} + +template<> +void ProfileInfoT<Function,BasicBlock>:: + addExecutionCount(const BasicBlock *BB, double w) { + double oldw = getExecutionCount(BB); + assert (oldw != MissingValue && "Adding weight to Block with no previous weight"); + DEBUG(dbgs() << "Adding to Block " << BB->getName() + << " (new weight: " << format("%.20g",oldw + w) << ")\n"); + BlockInformation[BB->getParent()][BB] = oldw + w; +} + +template<> +void ProfileInfoT<Function,BasicBlock>::removeBlock(const BasicBlock *BB) { + std::map<const Function*, BlockCounts>::iterator J = + BlockInformation.find(BB->getParent()); + if (J == BlockInformation.end()) return; + + DEBUG(dbgs() << "Deleting " << BB->getName() << "\n"); + J->second.erase(BB); +} + +template<> +void ProfileInfoT<Function,BasicBlock>::removeEdge(Edge e) { + std::map<const Function*, EdgeWeights>::iterator J = + EdgeInformation.find(getFunction(e)); + if (J == EdgeInformation.end()) return; + + DEBUG(dbgs() << "Deleting" << e << "\n"); + J->second.erase(e); +} + +template<> +void ProfileInfoT<Function,BasicBlock>:: + replaceEdge(const Edge &oldedge, const Edge &newedge) { + double w; + if ((w = getEdgeWeight(newedge)) == MissingValue) { + w = getEdgeWeight(oldedge); + DEBUG(dbgs() << "Replacing " << oldedge << " with " << newedge << "\n"); + } else { + w += getEdgeWeight(oldedge); + DEBUG(dbgs() << "Adding " << oldedge << " to " << newedge << "\n"); + } + setEdgeWeight(newedge,w); + removeEdge(oldedge); +} + +template<> +const BasicBlock *ProfileInfoT<Function,BasicBlock>:: + GetPath(const BasicBlock *Src, const BasicBlock *Dest, + Path &P, unsigned Mode) { + const BasicBlock *BB = 0; + bool hasFoundPath = false; + + std::queue<const BasicBlock *> BFS; + BFS.push(Src); + + while(BFS.size() && !hasFoundPath) { + BB = BFS.front(); + BFS.pop(); + + succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB); + if (Succ == End) { + P[0] = BB; + if (Mode & GetPathToExit) { + hasFoundPath = true; + BB = 0; + } + } + for(;Succ != End; ++Succ) { + if (P.find(*Succ) != P.end()) continue; + Edge e = getEdge(BB,*Succ); + if ((Mode & GetPathWithNewEdges) && (getEdgeWeight(e) != MissingValue)) continue; + P[*Succ] = BB; + BFS.push(*Succ); + if ((Mode & GetPathToDest) && *Succ == Dest) { + hasFoundPath = true; + BB = *Succ; + break; + } + if ((Mode & GetPathToValue) && (getExecutionCount(*Succ) != MissingValue)) { + hasFoundPath = true; + BB = *Succ; + break; + } + } + } + + return BB; +} + +template<> +void ProfileInfoT<Function,BasicBlock>:: + divertFlow(const Edge &oldedge, const Edge &newedge) { + DEBUG(dbgs() << "Diverting " << oldedge << " via " << newedge ); + + // First check if the old edge was taken, if not, just delete it... + if (getEdgeWeight(oldedge) == 0) { + removeEdge(oldedge); + return; + } + + Path P; + P[newedge.first] = 0; + P[newedge.second] = newedge.first; + const BasicBlock *BB = GetPath(newedge.second,oldedge.second,P,GetPathToExit | GetPathToDest); + + double w = getEdgeWeight (oldedge); + DEBUG(dbgs() << ", Weight: " << format("%.20g",w) << "\n"); + do { + const BasicBlock *Parent = P.find(BB)->second; + Edge e = getEdge(Parent,BB); + double oldw = getEdgeWeight(e); + double oldc = getExecutionCount(e.first); + setEdgeWeight(e, w+oldw); + if (Parent != oldedge.first) { + setExecutionCount(e.first, w+oldc); + } + BB = Parent; + } while (BB != newedge.first); + removeEdge(oldedge); +} + +/// Replaces all occurences of RmBB in the ProfilingInfo with DestBB. +/// This checks all edges of the function the blocks reside in and replaces the +/// occurences of RmBB with DestBB. +template<> +void ProfileInfoT<Function,BasicBlock>:: + replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB) { + DEBUG(dbgs() << "Replacing " << RmBB->getName() + << " with " << DestBB->getName() << "\n"); + const Function *F = DestBB->getParent(); + std::map<const Function*, EdgeWeights>::iterator J = + EdgeInformation.find(F); + if (J == EdgeInformation.end()) return; + + Edge e, newedge; + bool erasededge = false; + EdgeWeights::iterator I = J->second.begin(), E = J->second.end(); + while(I != E) { + e = (I++)->first; + bool foundedge = false; bool eraseedge = false; + if (e.first == RmBB) { + if (e.second == DestBB) { + eraseedge = true; + } else { + newedge = getEdge(DestBB, e.second); + foundedge = true; + } + } + if (e.second == RmBB) { + if (e.first == DestBB) { + eraseedge = true; + } else { + newedge = getEdge(e.first, DestBB); + foundedge = true; + } + } + if (foundedge) { + replaceEdge(e, newedge); + } + if (eraseedge) { + if (erasededge) { + Edge newedge = getEdge(DestBB, DestBB); + replaceEdge(e, newedge); + } else { + removeEdge(e); + erasededge = true; + } + } + } +} + +/// Splits an edge in the ProfileInfo and redirects flow over NewBB. +/// Since its possible that there is more than one edge in the CFG from FristBB +/// to SecondBB its necessary to redirect the flow proporionally. +template<> +void ProfileInfoT<Function,BasicBlock>::splitEdge(const BasicBlock *FirstBB, + const BasicBlock *SecondBB, + const BasicBlock *NewBB, + bool MergeIdenticalEdges) { + const Function *F = FirstBB->getParent(); + std::map<const Function*, EdgeWeights>::iterator J = + EdgeInformation.find(F); + if (J == EdgeInformation.end()) return; + + // Generate edges and read current weight. + Edge e = getEdge(FirstBB, SecondBB); + Edge n1 = getEdge(FirstBB, NewBB); + Edge n2 = getEdge(NewBB, SecondBB); + EdgeWeights &ECs = J->second; + double w = ECs[e]; + + int succ_count = 0; + if (!MergeIdenticalEdges) { + // First count the edges from FristBB to SecondBB, if there is more than + // one, only slice out a proporional part for NewBB. + for(succ_const_iterator BBI = succ_begin(FirstBB), BBE = succ_end(FirstBB); + BBI != BBE; ++BBI) { + if (*BBI == SecondBB) succ_count++; + } + // When the NewBB is completely new, increment the count by one so that + // the counts are properly distributed. + if (getExecutionCount(NewBB) == ProfileInfo::MissingValue) succ_count++; + } else { + // When the edges are merged anyway, then redirect all flow. + succ_count = 1; + } + + // We know now how many edges there are from FirstBB to SecondBB, reroute a + // proportional part of the edge weight over NewBB. + double neww = floor(w / succ_count); + ECs[n1] += neww; + ECs[n2] += neww; + BlockInformation[F][NewBB] += neww; + if (succ_count == 1) { + ECs.erase(e); + } else { + ECs[e] -= neww; + } +} + +template<> +void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *Old, + const BasicBlock* New) { + const Function *F = Old->getParent(); + std::map<const Function*, EdgeWeights>::iterator J = + EdgeInformation.find(F); + if (J == EdgeInformation.end()) return; + + DEBUG(dbgs() << "Splitting " << Old->getName() << " to " << New->getName() << "\n"); + + std::set<Edge> Edges; + for (EdgeWeights::iterator ewi = J->second.begin(), ewe = J->second.end(); + ewi != ewe; ++ewi) { + Edge old = ewi->first; + if (old.first == Old) { + Edges.insert(old); + } + } + for (std::set<Edge>::iterator EI = Edges.begin(), EE = Edges.end(); + EI != EE; ++EI) { + Edge newedge = getEdge(New, EI->second); + replaceEdge(*EI, newedge); + } + + double w = getExecutionCount(Old); + setEdgeWeight(getEdge(Old, New), w); + setExecutionCount(New, w); +} + +template<> +void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *BB, + const BasicBlock* NewBB, + BasicBlock *const *Preds, + unsigned NumPreds) { + const Function *F = BB->getParent(); + std::map<const Function*, EdgeWeights>::iterator J = + EdgeInformation.find(F); + if (J == EdgeInformation.end()) return; + + DEBUG(dbgs() << "Splitting " << NumPreds << " Edges from " << BB->getName() + << " to " << NewBB->getName() << "\n"); + + // Collect weight that was redirected over NewBB. + double newweight = 0; + + std::set<const BasicBlock *> ProcessedPreds; + // For all requestes Predecessors. + for (unsigned pred = 0; pred < NumPreds; ++pred) { + const BasicBlock * Pred = Preds[pred]; + if (ProcessedPreds.insert(Pred).second) { + // Create edges and read old weight. + Edge oldedge = getEdge(Pred, BB); + Edge newedge = getEdge(Pred, NewBB); + + // Remember how much weight was redirected. + newweight += getEdgeWeight(oldedge); + + replaceEdge(oldedge,newedge); + } + } + + Edge newedge = getEdge(NewBB,BB); + setEdgeWeight(newedge, newweight); + setExecutionCount(NewBB, newweight); +} + +template<> +void ProfileInfoT<Function,BasicBlock>::transfer(const Function *Old, + const Function *New) { + DEBUG(dbgs() << "Replacing Function " << Old->getName() << " with " + << New->getName() << "\n"); + std::map<const Function*, EdgeWeights>::iterator J = + EdgeInformation.find(Old); + if(J != EdgeInformation.end()) { + EdgeInformation[New] = J->second; + } + EdgeInformation.erase(Old); + BlockInformation.erase(Old); + FunctionInformation.erase(Old); +} + +static double readEdgeOrRemember(ProfileInfo::Edge edge, double w, + ProfileInfo::Edge &tocalc, unsigned &uncalc) { + if (w == ProfileInfo::MissingValue) { + tocalc = edge; + uncalc++; + return 0; + } else { + return w; + } +} + +template<> +bool ProfileInfoT<Function,BasicBlock>:: + CalculateMissingEdge(const BasicBlock *BB, Edge &removed, + bool assumeEmptySelf) { + Edge edgetocalc; + unsigned uncalculated = 0; + + // collect weights of all incoming and outgoing edges, rememer edges that + // have no value + double incount = 0; + SmallSet<const BasicBlock*,8> pred_visited; + const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + if (bbi==bbe) { + Edge e = getEdge(0,BB); + incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated); + } + for (;bbi != bbe; ++bbi) { + if (pred_visited.insert(*bbi)) { + Edge e = getEdge(*bbi,BB); + incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated); + } + } + + double outcount = 0; + SmallSet<const BasicBlock*,8> succ_visited; + succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB); + if (sbbi==sbbe) { + Edge e = getEdge(BB,0); + if (getEdgeWeight(e) == MissingValue) { + double w = getExecutionCount(BB); + if (w != MissingValue) { + setEdgeWeight(e,w); + removed = e; + } + } + outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated); + } + for (;sbbi != sbbe; ++sbbi) { + if (succ_visited.insert(*sbbi)) { + Edge e = getEdge(BB,*sbbi); + outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated); + } + } + + // if exactly one edge weight was missing, calculate it and remove it from + // spanning tree + if (uncalculated == 0 ) { + return true; + } else + if (uncalculated == 1) { + if (incount < outcount) { + EdgeInformation[BB->getParent()][edgetocalc] = outcount-incount; + } else { + EdgeInformation[BB->getParent()][edgetocalc] = incount-outcount; + } + DEBUG(dbgs() << "--Calc Edge Counter for " << edgetocalc << ": " + << format("%.20g", getEdgeWeight(edgetocalc)) << "\n"); + removed = edgetocalc; + return true; + } else + if (uncalculated == 2 && assumeEmptySelf && edgetocalc.first == edgetocalc.second && incount == outcount) { + setEdgeWeight(edgetocalc, incount * 10); + removed = edgetocalc; + return true; + } else { + return false; + } +} + +static void readEdge(ProfileInfo *PI, ProfileInfo::Edge e, double &calcw, std::set<ProfileInfo::Edge> &misscount) { + double w = PI->getEdgeWeight(e); + if (w != ProfileInfo::MissingValue) { + calcw += w; + } else { + misscount.insert(e); + } +} + +template<> +bool ProfileInfoT<Function,BasicBlock>::EstimateMissingEdges(const BasicBlock *BB) { + double inWeight = 0; + std::set<Edge> inMissing; + std::set<const BasicBlock*> ProcessedPreds; + const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + if (bbi == bbe) { + readEdge(this,getEdge(0,BB),inWeight,inMissing); + } + for( ; bbi != bbe; ++bbi ) { + if (ProcessedPreds.insert(*bbi).second) { + readEdge(this,getEdge(*bbi,BB),inWeight,inMissing); + } + } + + double outWeight = 0; + std::set<Edge> outMissing; + std::set<const BasicBlock*> ProcessedSuccs; + succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB); + if (sbbi == sbbe) + readEdge(this,getEdge(BB,0),outWeight,outMissing); + for ( ; sbbi != sbbe; ++sbbi ) { + if (ProcessedSuccs.insert(*sbbi).second) { + readEdge(this,getEdge(BB,*sbbi),outWeight,outMissing); + } + } + + double share; + std::set<Edge>::iterator ei,ee; + if (inMissing.size() == 0 && outMissing.size() > 0) { + ei = outMissing.begin(); + ee = outMissing.end(); + share = inWeight/outMissing.size(); + setExecutionCount(BB,inWeight); + } else + if (inMissing.size() > 0 && outMissing.size() == 0 && outWeight == 0) { + ei = inMissing.begin(); + ee = inMissing.end(); + share = 0; + setExecutionCount(BB,0); + } else + if (inMissing.size() == 0 && outMissing.size() == 0) { + setExecutionCount(BB,outWeight); + return true; + } else { + return false; + } + for ( ; ei != ee; ++ei ) { + setEdgeWeight(*ei,share); + } + return true; +} + +template<> +void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) { +// if (getExecutionCount(&(F->getEntryBlock())) == 0) { +// for (Function::const_iterator FI = F->begin(), FE = F->end(); +// FI != FE; ++FI) { +// const BasicBlock* BB = &(*FI); +// { +// const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); +// if (NBB == End) { +// setEdgeWeight(getEdge(0,BB),0); +// } +// for(;NBB != End; ++NBB) { +// setEdgeWeight(getEdge(*NBB,BB),0); +// } +// } +// { +// succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); +// if (NBB == End) { +// setEdgeWeight(getEdge(0,BB),0); +// } +// for(;NBB != End; ++NBB) { +// setEdgeWeight(getEdge(*NBB,BB),0); +// } +// } +// } +// return; +// } + // The set of BasicBlocks that are still unvisited. + std::set<const BasicBlock*> Unvisited; + + // The set of return edges (Edges with no successors). + std::set<Edge> ReturnEdges; + double ReturnWeight = 0; + + // First iterate over the whole function and collect: + // 1) The blocks in this function in the Unvisited set. + // 2) The return edges in the ReturnEdges set. + // 3) The flow that is leaving the function already via return edges. + + // Data structure for searching the function. + std::queue<const BasicBlock *> BFS; + const BasicBlock *BB = &(F->getEntryBlock()); + BFS.push(BB); + Unvisited.insert(BB); + + while (BFS.size()) { + BB = BFS.front(); BFS.pop(); + succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); + if (NBB == End) { + Edge e = getEdge(BB,0); + double w = getEdgeWeight(e); + if (w == MissingValue) { + // If the return edge has no value, try to read value from block. + double bw = getExecutionCount(BB); + if (bw != MissingValue) { + setEdgeWeight(e,bw); + ReturnWeight += bw; + } else { + // If both return edge and block provide no value, collect edge. + ReturnEdges.insert(e); + } + } else { + // If the return edge has a proper value, collect it. + ReturnWeight += w; + } + } + for (;NBB != End; ++NBB) { + if (Unvisited.insert(*NBB).second) { + BFS.push(*NBB); + } + } + } + + while (Unvisited.size() > 0) { + unsigned oldUnvisitedCount = Unvisited.size(); + bool FoundPath = false; + + // If there is only one edge left, calculate it. + if (ReturnEdges.size() == 1) { + ReturnWeight = getExecutionCount(&(F->getEntryBlock())) - ReturnWeight; + + Edge e = *ReturnEdges.begin(); + setEdgeWeight(e,ReturnWeight); + setExecutionCount(e.first,ReturnWeight); + + Unvisited.erase(e.first); + ReturnEdges.erase(e); + continue; + } + + // Calculate all blocks where only one edge is missing, this may also + // resolve furhter return edges. + std::set<const BasicBlock *>::iterator FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE) { + const BasicBlock *BB = *FI; ++FI; + Edge e; + if(CalculateMissingEdge(BB,e,true)) { + if (BlockInformation[F].find(BB) == BlockInformation[F].end()) { + setExecutionCount(BB,getExecutionCount(BB)); + } + Unvisited.erase(BB); + if (e.first != 0 && e.second == 0) { + ReturnEdges.erase(e); + ReturnWeight += getEdgeWeight(e); + } + } + } + if (oldUnvisitedCount > Unvisited.size()) continue; + + // Estimate edge weights by dividing the flow proportionally. + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE) { + const BasicBlock *BB = *FI; ++FI; + const BasicBlock *Dest = 0; + bool AllEdgesHaveSameReturn = true; + // Check each Successor, these must all end up in the same or an empty + // return block otherwise its dangerous to do an estimation on them. + for (succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB); + Succ != End; ++Succ) { + Path P; + GetPath(*Succ, 0, P, GetPathToExit); + if (Dest && Dest != P[0]) { + AllEdgesHaveSameReturn = false; + } + Dest = P[0]; + } + if (AllEdgesHaveSameReturn) { + if(EstimateMissingEdges(BB)) { + Unvisited.erase(BB); + break; + } + } + } + if (oldUnvisitedCount > Unvisited.size()) continue; + + // Check if there is a path to an block that has a known value and redirect + // flow accordingly. + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE && !FoundPath) { + // Fetch path. + const BasicBlock *BB = *FI; ++FI; + Path P; + const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToValue); + + // Calculate incoming flow. + double iw = 0; unsigned inmissing = 0; unsigned incount = 0; unsigned invalid = 0; + std::set<const BasicBlock *> Processed; + for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); + NBB != End; ++NBB) { + if (Processed.insert(*NBB).second) { + Edge e = getEdge(*NBB, BB); + double ew = getEdgeWeight(e); + if (ew != MissingValue) { + iw += ew; + invalid++; + } else { + // If the path contains the successor, this means its a backedge, + // do not count as missing. + if (P.find(*NBB) == P.end()) + inmissing++; + } + incount++; + } + } + if (inmissing == incount) continue; + if (invalid == 0) continue; + + // Subtract (already) outgoing flow. + Processed.clear(); + for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); + NBB != End; ++NBB) { + if (Processed.insert(*NBB).second) { + Edge e = getEdge(BB, *NBB); + double ew = getEdgeWeight(e); + if (ew != MissingValue) { + iw -= ew; + } + } + } + if (iw < 0) continue; + + // Check the recieving end of the path if it can handle the flow. + double ow = getExecutionCount(Dest); + Processed.clear(); + for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); + NBB != End; ++NBB) { + if (Processed.insert(*NBB).second) { + Edge e = getEdge(BB, *NBB); + double ew = getEdgeWeight(e); + if (ew != MissingValue) { + ow -= ew; + } + } + } + if (ow < 0) continue; + + // Determine how much flow shall be used. + double ew = getEdgeWeight(getEdge(P[Dest],Dest)); + if (ew != MissingValue) { + ew = ew<ow?ew:ow; + ew = ew<iw?ew:iw; + } else { + if (inmissing == 0) + ew = iw; + } + + // Create flow. + if (ew != MissingValue) { + do { + Edge e = getEdge(P[Dest],Dest); + if (getEdgeWeight(e) == MissingValue) { + setEdgeWeight(e,ew); + FoundPath = true; + } + Dest = P[Dest]; + } while (Dest != BB); + } + } + if (FoundPath) continue; + + // Calculate a block with self loop. + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE && !FoundPath) { + const BasicBlock *BB = *FI; ++FI; + bool SelfEdgeFound = false; + for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); + NBB != End; ++NBB) { + if (*NBB == BB) { + SelfEdgeFound = true; + break; + } + } + if (SelfEdgeFound) { + Edge e = getEdge(BB,BB); + if (getEdgeWeight(e) == MissingValue) { + double iw = 0; + std::set<const BasicBlock *> Processed; + for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); + NBB != End; ++NBB) { + if (Processed.insert(*NBB).second) { + Edge e = getEdge(*NBB, BB); + double ew = getEdgeWeight(e); + if (ew != MissingValue) { + iw += ew; + } + } + } + setEdgeWeight(e,iw * 10); + FoundPath = true; + } + } + } + if (FoundPath) continue; + + // Determine backedges, set them to zero. + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE && !FoundPath) { + const BasicBlock *BB = *FI; ++FI; + const BasicBlock *Dest; + Path P; + bool BackEdgeFound = false; + for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); + NBB != End; ++NBB) { + Dest = GetPath(BB, *NBB, P, GetPathToDest | GetPathWithNewEdges); + if (Dest == *NBB) { + BackEdgeFound = true; + break; + } + } + if (BackEdgeFound) { + Edge e = getEdge(Dest,BB); + double w = getEdgeWeight(e); + if (w == MissingValue) { + setEdgeWeight(e,0); + FoundPath = true; + } + do { + Edge e = getEdge(P[Dest], Dest); + double w = getEdgeWeight(e); + if (w == MissingValue) { + setEdgeWeight(e,0); + FoundPath = true; + } + Dest = P[Dest]; + } while (Dest != BB); + } + } + if (FoundPath) continue; + + // Channel flow to return block. + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE && !FoundPath) { + const BasicBlock *BB = *FI; ++FI; + + Path P; + const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges); + Dest = P[0]; + if (!Dest) continue; + + if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) { + // Calculate incoming flow. + double iw = 0; + std::set<const BasicBlock *> Processed; + for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); + NBB != End; ++NBB) { + if (Processed.insert(*NBB).second) { + Edge e = getEdge(*NBB, BB); + double ew = getEdgeWeight(e); + if (ew != MissingValue) { + iw += ew; + } + } + } + do { + Edge e = getEdge(P[Dest], Dest); + double w = getEdgeWeight(e); + if (w == MissingValue) { + setEdgeWeight(e,iw); + FoundPath = true; + } else { + assert(0 && "Edge should not have value already!"); + } + Dest = P[Dest]; + } while (Dest != BB); + } + } + if (FoundPath) continue; + + // Speculatively set edges to zero. + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE && !FoundPath) { + const BasicBlock *BB = *FI; ++FI; + + for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); + NBB != End; ++NBB) { + Edge e = getEdge(*NBB,BB); + double w = getEdgeWeight(e); + if (w == MissingValue) { + setEdgeWeight(e,0); + FoundPath = true; + break; + } + } + } + if (FoundPath) continue; + + errs() << "{"; + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE) { + const BasicBlock *BB = *FI; ++FI; + dbgs() << BB->getName(); + if (FI != FE) + dbgs() << ","; + } + errs() << "}"; + + errs() << "ASSERT: could not repair function"; + assert(0 && "could not repair function"); + } + + EdgeWeights J = EdgeInformation[F]; + for (EdgeWeights::iterator EI = J.begin(), EE = J.end(); EI != EE; ++EI) { + Edge e = EI->first; + + bool SuccFound = false; + if (e.first != 0) { + succ_const_iterator NBB = succ_begin(e.first), End = succ_end(e.first); + if (NBB == End) { + if (0 == e.second) { + SuccFound = true; + } + } + for (;NBB != End; ++NBB) { + if (*NBB == e.second) { + SuccFound = true; + break; + } + } + if (!SuccFound) { + removeEdge(e); + } + } + } +} + +raw_ostream& operator<<(raw_ostream &O, const Function *F) { + return O << F->getName(); +} + +raw_ostream& operator<<(raw_ostream &O, const MachineFunction *MF) { + return O << MF->getFunction()->getName() << "(MF)"; +} + +raw_ostream& operator<<(raw_ostream &O, const BasicBlock *BB) { + return O << BB->getName(); +} + +raw_ostream& operator<<(raw_ostream &O, const MachineBasicBlock *MBB) { + return O << MBB->getBasicBlock()->getName() << "(MB)"; +} + +raw_ostream& operator<<(raw_ostream &O, std::pair<const BasicBlock *, const BasicBlock *> E) { + O << "("; + + if (E.first) + O << E.first; + else + O << "0"; + + O << ","; + + if (E.second) + O << E.second; + else + O << "0"; + + return O << ")"; +} + +raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, const MachineBasicBlock *> E) { + O << "("; + + if (E.first) + O << E.first; + else + O << "0"; + + O << ","; + + if (E.second) + O << E.second; + else + O << "0"; + + return O << ")"; +} + +} // namespace llvm + +//===----------------------------------------------------------------------===// +// NoProfile ProfileInfo implementation +// + +namespace { + struct NoProfileInfo : public ImmutablePass, public ProfileInfo { + static char ID; // Class identification, replacement for typeinfo + NoProfileInfo() : ImmutablePass(ID) {} + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &ProfileInfo::ID) + return (ProfileInfo*)this; + return this; + } + + virtual const char *getPassName() const { + return "NoProfileInfo"; + } + }; +} // End of anonymous namespace + +char NoProfileInfo::ID = 0; +// Register this pass... +INITIALIZE_AG_PASS(NoProfileInfo, ProfileInfo, "no-profile", + "No Profile Information", false, true, true); + +ImmutablePass *llvm::createNoProfileInfoPass() { return new NoProfileInfo(); } diff --git a/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp b/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp new file mode 100644 index 0000000..25481b2 --- /dev/null +++ b/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp @@ -0,0 +1,158 @@ +//===- ProfileInfoLoad.cpp - Load profile information from disk -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The ProfileInfoLoader class is used to load and represent profiling +// information read in from the dump file. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ProfileInfoLoader.h" +#include "llvm/Analysis/ProfileInfoTypes.h" +#include "llvm/Module.h" +#include "llvm/InstrTypes.h" +#include "llvm/Support/raw_ostream.h" +#include <cstdio> +#include <cstdlib> +#include <map> +using namespace llvm; + +// ByteSwap - Byteswap 'Var' if 'Really' is true. +// +static inline unsigned ByteSwap(unsigned Var, bool Really) { + if (!Really) return Var; + return ((Var & (255U<< 0U)) << 24U) | + ((Var & (255U<< 8U)) << 8U) | + ((Var & (255U<<16U)) >> 8U) | + ((Var & (255U<<24U)) >> 24U); +} + +static unsigned AddCounts(unsigned A, unsigned B) { + // If either value is undefined, use the other. + if (A == ProfileInfoLoader::Uncounted) return B; + if (B == ProfileInfoLoader::Uncounted) return A; + return A + B; +} + +static void ReadProfilingBlock(const char *ToolName, FILE *F, + bool ShouldByteSwap, + std::vector<unsigned> &Data) { + // Read the number of entries... + unsigned NumEntries; + if (fread(&NumEntries, sizeof(unsigned), 1, F) != 1) { + errs() << ToolName << ": data packet truncated!\n"; + perror(0); + exit(1); + } + NumEntries = ByteSwap(NumEntries, ShouldByteSwap); + + // Read the counts... + std::vector<unsigned> TempSpace(NumEntries); + + // Read in the block of data... + if (fread(&TempSpace[0], sizeof(unsigned)*NumEntries, 1, F) != 1) { + errs() << ToolName << ": data packet truncated!\n"; + perror(0); + exit(1); + } + + // Make sure we have enough space... The space is initialised to -1 to + // facitiltate the loading of missing values for OptimalEdgeProfiling. + if (Data.size() < NumEntries) + Data.resize(NumEntries, ProfileInfoLoader::Uncounted); + + // Accumulate the data we just read into the data. + if (!ShouldByteSwap) { + for (unsigned i = 0; i != NumEntries; ++i) { + Data[i] = AddCounts(TempSpace[i], Data[i]); + } + } else { + for (unsigned i = 0; i != NumEntries; ++i) { + Data[i] = AddCounts(ByteSwap(TempSpace[i], true), Data[i]); + } + } +} + +const unsigned ProfileInfoLoader::Uncounted = ~0U; + +// ProfileInfoLoader ctor - Read the specified profiling data file, exiting the +// program if the file is invalid or broken. +// +ProfileInfoLoader::ProfileInfoLoader(const char *ToolName, + const std::string &Filename, + Module &TheModule) : + Filename(Filename), + M(TheModule), Warned(false) { + FILE *F = fopen(Filename.c_str(), "rb"); + if (F == 0) { + errs() << ToolName << ": Error opening '" << Filename << "': "; + perror(0); + exit(1); + } + + // Keep reading packets until we run out of them. + unsigned PacketType; + while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) { + // If the low eight bits of the packet are zero, we must be dealing with an + // endianness mismatch. Byteswap all words read from the profiling + // information. + bool ShouldByteSwap = (char)PacketType == 0; + PacketType = ByteSwap(PacketType, ShouldByteSwap); + + switch (PacketType) { + case ArgumentInfo: { + unsigned ArgLength; + if (fread(&ArgLength, sizeof(unsigned), 1, F) != 1) { + errs() << ToolName << ": arguments packet truncated!\n"; + perror(0); + exit(1); + } + ArgLength = ByteSwap(ArgLength, ShouldByteSwap); + + // Read in the arguments... + std::vector<char> Chars(ArgLength+4); + + if (ArgLength) + if (fread(&Chars[0], (ArgLength+3) & ~3, 1, F) != 1) { + errs() << ToolName << ": arguments packet truncated!\n"; + perror(0); + exit(1); + } + CommandLines.push_back(std::string(&Chars[0], &Chars[ArgLength])); + break; + } + + case FunctionInfo: + ReadProfilingBlock(ToolName, F, ShouldByteSwap, FunctionCounts); + break; + + case BlockInfo: + ReadProfilingBlock(ToolName, F, ShouldByteSwap, BlockCounts); + break; + + case EdgeInfo: + ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts); + break; + + case OptEdgeInfo: + ReadProfilingBlock(ToolName, F, ShouldByteSwap, OptimalEdgeCounts); + break; + + case BBTraceInfo: + ReadProfilingBlock(ToolName, F, ShouldByteSwap, BBTrace); + break; + + default: + errs() << ToolName << ": Unknown packet type #" << PacketType << "!\n"; + exit(1); + } + } + + fclose(F); +} + diff --git a/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp b/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp new file mode 100644 index 0000000..d325b57 --- /dev/null +++ b/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp @@ -0,0 +1,266 @@ +//===- ProfileInfoLoaderPass.cpp - LLVM Pass to load profile info ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a concrete implementation of profiling information that +// loads the information from a profile dump file. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "profile-loader" +#include "llvm/BasicBlock.h" +#include "llvm/InstrTypes.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Analysis/ProfileInfoLoader.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallSet.h" +#include <set> +using namespace llvm; + +STATISTIC(NumEdgesRead, "The # of edges read."); + +static cl::opt<std::string> +ProfileInfoFilename("profile-info-file", cl::init("llvmprof.out"), + cl::value_desc("filename"), + cl::desc("Profile file loaded by -profile-loader")); + +namespace { + class LoaderPass : public ModulePass, public ProfileInfo { + std::string Filename; + std::set<Edge> SpanningTree; + std::set<const BasicBlock*> BBisUnvisited; + unsigned ReadCount; + public: + static char ID; // Class identification, replacement for typeinfo + explicit LoaderPass(const std::string &filename = "") + : ModulePass(ID), Filename(filename) { + if (filename.empty()) Filename = ProfileInfoFilename; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + virtual const char *getPassName() const { + return "Profiling information loader"; + } + + // recurseBasicBlock() - Calculates the edge weights for as much basic + // blocks as possbile. + virtual void recurseBasicBlock(const BasicBlock *BB); + virtual void readEdgeOrRemember(Edge, Edge&, unsigned &, double &); + virtual void readEdge(ProfileInfo::Edge, std::vector<unsigned>&); + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &ProfileInfo::ID) + return (ProfileInfo*)this; + return this; + } + + /// run - Load the profile information from the specified file. + virtual bool runOnModule(Module &M); + }; +} // End of anonymous namespace + +char LoaderPass::ID = 0; +INITIALIZE_AG_PASS(LoaderPass, ProfileInfo, "profile-loader", + "Load profile information from llvmprof.out", false, true, false); + +char &llvm::ProfileLoaderPassID = LoaderPass::ID; + +ModulePass *llvm::createProfileLoaderPass() { return new LoaderPass(); } + +/// createProfileLoaderPass - This function returns a Pass that loads the +/// profiling information for the module from the specified filename, making it +/// available to the optimizers. +Pass *llvm::createProfileLoaderPass(const std::string &Filename) { + return new LoaderPass(Filename); +} + +void LoaderPass::readEdgeOrRemember(Edge edge, Edge &tocalc, + unsigned &uncalc, double &count) { + double w; + if ((w = getEdgeWeight(edge)) == MissingValue) { + tocalc = edge; + uncalc++; + } else { + count+=w; + } +} + +// recurseBasicBlock - Visits all neighbours of a block and then tries to +// calculate the missing edge values. +void LoaderPass::recurseBasicBlock(const BasicBlock *BB) { + + // break recursion if already visited + if (BBisUnvisited.find(BB) == BBisUnvisited.end()) return; + BBisUnvisited.erase(BB); + if (!BB) return; + + for (succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + bbi != bbe; ++bbi) { + recurseBasicBlock(*bbi); + } + for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + bbi != bbe; ++bbi) { + recurseBasicBlock(*bbi); + } + + Edge tocalc; + if (CalculateMissingEdge(BB, tocalc)) { + SpanningTree.erase(tocalc); + } +} + +void LoaderPass::readEdge(ProfileInfo::Edge e, + std::vector<unsigned> &ECs) { + if (ReadCount < ECs.size()) { + double weight = ECs[ReadCount++]; + if (weight != ProfileInfoLoader::Uncounted) { + // Here the data realm changes from the unsigned of the file to the + // double of the ProfileInfo. This conversion is save because we know + // that everything thats representable in unsinged is also representable + // in double. + EdgeInformation[getFunction(e)][e] += (double)weight; + + DEBUG(dbgs() << "--Read Edge Counter for " << e + << " (# "<< (ReadCount-1) << "): " + << (unsigned)getEdgeWeight(e) << "\n"); + } else { + // This happens only if reading optimal profiling information, not when + // reading regular profiling information. + SpanningTree.insert(e); + } + } +} + +bool LoaderPass::runOnModule(Module &M) { + ProfileInfoLoader PIL("profile-loader", Filename, M); + + EdgeInformation.clear(); + std::vector<unsigned> Counters = PIL.getRawEdgeCounts(); + if (Counters.size() > 0) { + ReadCount = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n"); + readEdge(getEdge(0,&F->getEntryBlock()), Counters); + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) { + readEdge(getEdge(BB,TI->getSuccessor(s)), Counters); + } + } + } + if (ReadCount != Counters.size()) { + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; + } + NumEdgesRead = ReadCount; + } + + Counters = PIL.getRawOptimalEdgeCounts(); + if (Counters.size() > 0) { + ReadCount = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n"); + readEdge(getEdge(0,&F->getEntryBlock()), Counters); + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + if (TI->getNumSuccessors() == 0) { + readEdge(getEdge(BB,0), Counters); + } + for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) { + readEdge(getEdge(BB,TI->getSuccessor(s)), Counters); + } + } + while (SpanningTree.size() > 0) { + + unsigned size = SpanningTree.size(); + + BBisUnvisited.clear(); + for (std::set<Edge>::iterator ei = SpanningTree.begin(), + ee = SpanningTree.end(); ei != ee; ++ei) { + BBisUnvisited.insert(ei->first); + BBisUnvisited.insert(ei->second); + } + while (BBisUnvisited.size() > 0) { + recurseBasicBlock(*BBisUnvisited.begin()); + } + + if (SpanningTree.size() == size) { + DEBUG(dbgs()<<"{"); + for (std::set<Edge>::iterator ei = SpanningTree.begin(), + ee = SpanningTree.end(); ei != ee; ++ei) { + DEBUG(dbgs()<< *ei <<","); + } + assert(0 && "No edge calculated!"); + } + + } + } + if (ReadCount != Counters.size()) { + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; + } + NumEdgesRead = ReadCount; + } + + BlockInformation.clear(); + Counters = PIL.getRawBlockCounts(); + if (Counters.size() > 0) { + ReadCount = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + if (ReadCount < Counters.size()) + // Here the data realm changes from the unsigned of the file to the + // double of the ProfileInfo. This conversion is save because we know + // that everything thats representable in unsinged is also + // representable in double. + BlockInformation[F][BB] = (double)Counters[ReadCount++]; + } + if (ReadCount != Counters.size()) { + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; + } + } + + FunctionInformation.clear(); + Counters = PIL.getRawFunctionCounts(); + if (Counters.size() > 0) { + ReadCount = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + if (ReadCount < Counters.size()) + // Here the data realm changes from the unsigned of the file to the + // double of the ProfileInfo. This conversion is save because we know + // that everything thats representable in unsinged is also + // representable in double. + FunctionInformation[F] = (double)Counters[ReadCount++]; + } + if (ReadCount != Counters.size()) { + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; + } + } + + return false; +} diff --git a/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp b/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp new file mode 100644 index 0000000..3f01b2d --- /dev/null +++ b/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp @@ -0,0 +1,377 @@ +//===- ProfileVerifierPass.cpp - LLVM Pass to estimate profile info -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass that checks profiling information for +// plausibility. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "profile-verifier" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Debug.h" +#include <set> +using namespace llvm; + +static cl::opt<bool,false> +ProfileVerifierDisableAssertions("profile-verifier-noassert", + cl::desc("Disable assertions")); + +namespace llvm { + template<class FType, class BType> + class ProfileVerifierPassT : public FunctionPass { + + struct DetailedBlockInfo { + const BType *BB; + double BBWeight; + double inWeight; + int inCount; + double outWeight; + int outCount; + }; + + ProfileInfoT<FType, BType> *PI; + std::set<const BType*> BBisVisited; + std::set<const FType*> FisVisited; + bool DisableAssertions; + + // When debugging is enabled, the verifier prints a whole slew of debug + // information, otherwise its just the assert. These are all the helper + // functions. + bool PrintedDebugTree; + std::set<const BType*> BBisPrinted; + void debugEntry(DetailedBlockInfo*); + void printDebugInfo(const BType *BB); + + public: + static char ID; // Class identification, replacement for typeinfo + + explicit ProfileVerifierPassT () : FunctionPass(ID) { + DisableAssertions = ProfileVerifierDisableAssertions; + } + explicit ProfileVerifierPassT (bool da) : FunctionPass(ID), + DisableAssertions(da) { + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<ProfileInfoT<FType, BType> >(); + } + + const char *getPassName() const { + return "Profiling information verifier"; + } + + /// run - Verify the profile information. + bool runOnFunction(FType &F); + void recurseBasicBlock(const BType*); + + bool exitReachable(const FType*); + double ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge); + void CheckValue(bool, const char*, DetailedBlockInfo*); + }; + + typedef ProfileVerifierPassT<Function, BasicBlock> ProfileVerifierPass; + + template<class FType, class BType> + void ProfileVerifierPassT<FType, BType>::printDebugInfo(const BType *BB) { + + if (BBisPrinted.find(BB) != BBisPrinted.end()) return; + + double BBWeight = PI->getExecutionCount(BB); + if (BBWeight == ProfileInfoT<FType, BType>::MissingValue) { BBWeight = 0; } + double inWeight = 0; + int inCount = 0; + std::set<const BType*> ProcessedPreds; + for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + bbi != bbe; ++bbi ) { + if (ProcessedPreds.insert(*bbi).second) { + typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(*bbi,BB); + double EdgeWeight = PI->getEdgeWeight(E); + if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; } + dbgs() << "calculated in-edge " << E << ": " + << format("%20.20g",EdgeWeight) << "\n"; + inWeight += EdgeWeight; + inCount++; + } + } + double outWeight = 0; + int outCount = 0; + std::set<const BType*> ProcessedSuccs; + for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + bbi != bbe; ++bbi ) { + if (ProcessedSuccs.insert(*bbi).second) { + typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(BB,*bbi); + double EdgeWeight = PI->getEdgeWeight(E); + if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; } + dbgs() << "calculated out-edge " << E << ": " + << format("%20.20g",EdgeWeight) << "\n"; + outWeight += EdgeWeight; + outCount++; + } + } + dbgs() << "Block " << BB->getNameStr() << " in " + << BB->getParent()->getNameStr() << ":" + << "BBWeight=" << format("%20.20g",BBWeight) << "," + << "inWeight=" << format("%20.20g",inWeight) << "," + << "inCount=" << inCount << "," + << "outWeight=" << format("%20.20g",outWeight) << "," + << "outCount" << outCount << "\n"; + + // mark as visited and recurse into subnodes + BBisPrinted.insert(BB); + for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + bbi != bbe; ++bbi ) { + printDebugInfo(*bbi); + } + } + + template<class FType, class BType> + void ProfileVerifierPassT<FType, BType>::debugEntry (DetailedBlockInfo *DI) { + dbgs() << "TROUBLE: Block " << DI->BB->getNameStr() << " in " + << DI->BB->getParent()->getNameStr() << ":" + << "BBWeight=" << format("%20.20g",DI->BBWeight) << "," + << "inWeight=" << format("%20.20g",DI->inWeight) << "," + << "inCount=" << DI->inCount << "," + << "outWeight=" << format("%20.20g",DI->outWeight) << "," + << "outCount=" << DI->outCount << "\n"; + if (!PrintedDebugTree) { + PrintedDebugTree = true; + printDebugInfo(&(DI->BB->getParent()->getEntryBlock())); + } + } + + // This compares A and B for equality. + static bool Equals(double A, double B) { + return A == B; + } + + // This checks if the function "exit" is reachable from an given function + // via calls, this is necessary to check if a profile is valid despite the + // counts not fitting exactly. + template<class FType, class BType> + bool ProfileVerifierPassT<FType, BType>::exitReachable(const FType *F) { + if (!F) return false; + + if (FisVisited.count(F)) return false; + + FType *Exit = F->getParent()->getFunction("exit"); + if (Exit == F) { + return true; + } + + FisVisited.insert(F); + bool exits = false; + for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + if (const CallInst *CI = dyn_cast<CallInst>(&*I)) { + FType *F = CI->getCalledFunction(); + if (F) { + exits |= exitReachable(F); + } else { + // This is a call to a pointer, all bets are off... + exits = true; + } + if (exits) break; + } + } + return exits; + } + + #define ASSERTMESSAGE(M) \ + { dbgs() << "ASSERT:" << (M) << "\n"; \ + if (!DisableAssertions) assert(0 && (M)); } + + template<class FType, class BType> + double ProfileVerifierPassT<FType, BType>::ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge E) { + double EdgeWeight = PI->getEdgeWeight(E); + if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { + dbgs() << "Edge " << E << " in Function " + << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": "; + ASSERTMESSAGE("Edge has missing value"); + return 0; + } else { + if (EdgeWeight < 0) { + dbgs() << "Edge " << E << " in Function " + << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": "; + ASSERTMESSAGE("Edge has negative value"); + } + return EdgeWeight; + } + } + + template<class FType, class BType> + void ProfileVerifierPassT<FType, BType>::CheckValue(bool Error, + const char *Message, + DetailedBlockInfo *DI) { + if (Error) { + DEBUG(debugEntry(DI)); + dbgs() << "Block " << DI->BB->getNameStr() << " in Function " + << DI->BB->getParent()->getNameStr() << ": "; + ASSERTMESSAGE(Message); + } + return; + } + + // This calculates the Information for a block and then recurses into the + // successors. + template<class FType, class BType> + void ProfileVerifierPassT<FType, BType>::recurseBasicBlock(const BType *BB) { + + // Break the recursion by remembering all visited blocks. + if (BBisVisited.find(BB) != BBisVisited.end()) return; + + // Use a data structure to store all the information, this can then be handed + // to debug printers. + DetailedBlockInfo DI; + DI.BB = BB; + DI.outCount = DI.inCount = 0; + DI.inWeight = DI.outWeight = 0; + + // Read predecessors. + std::set<const BType*> ProcessedPreds; + const_pred_iterator bpi = pred_begin(BB), bpe = pred_end(BB); + // If there are none, check for (0,BB) edge. + if (bpi == bpe) { + DI.inWeight += ReadOrAssert(PI->getEdge(0,BB)); + DI.inCount++; + } + for (;bpi != bpe; ++bpi) { + if (ProcessedPreds.insert(*bpi).second) { + DI.inWeight += ReadOrAssert(PI->getEdge(*bpi,BB)); + DI.inCount++; + } + } + + // Read successors. + std::set<const BType*> ProcessedSuccs; + succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + // If there is an (0,BB) edge, consider it too. (This is done not only when + // there are no successors, but every time; not every function contains + // return blocks with no successors (think loop latch as return block)). + double w = PI->getEdgeWeight(PI->getEdge(BB,0)); + if (w != ProfileInfoT<FType, BType>::MissingValue) { + DI.outWeight += w; + DI.outCount++; + } + for (;bbi != bbe; ++bbi) { + if (ProcessedSuccs.insert(*bbi).second) { + DI.outWeight += ReadOrAssert(PI->getEdge(BB,*bbi)); + DI.outCount++; + } + } + + // Read block weight. + DI.BBWeight = PI->getExecutionCount(BB); + CheckValue(DI.BBWeight == ProfileInfoT<FType, BType>::MissingValue, + "BasicBlock has missing value", &DI); + CheckValue(DI.BBWeight < 0, + "BasicBlock has negative value", &DI); + + // Check if this block is a setjmp target. + bool isSetJmpTarget = false; + if (DI.outWeight > DI.inWeight) { + for (typename BType::const_iterator i = BB->begin(), ie = BB->end(); + i != ie; ++i) { + if (const CallInst *CI = dyn_cast<CallInst>(&*i)) { + FType *F = CI->getCalledFunction(); + if (F && (F->getNameStr() == "_setjmp")) { + isSetJmpTarget = true; break; + } + } + } + } + // Check if this block is eventually reaching exit. + bool isExitReachable = false; + if (DI.inWeight > DI.outWeight) { + for (typename BType::const_iterator i = BB->begin(), ie = BB->end(); + i != ie; ++i) { + if (const CallInst *CI = dyn_cast<CallInst>(&*i)) { + FType *F = CI->getCalledFunction(); + if (F) { + FisVisited.clear(); + isExitReachable |= exitReachable(F); + } else { + // This is a call to a pointer, all bets are off... + isExitReachable = true; + } + if (isExitReachable) break; + } + } + } + + if (DI.inCount > 0 && DI.outCount == 0) { + // If this is a block with no successors. + if (!isSetJmpTarget) { + CheckValue(!Equals(DI.inWeight,DI.BBWeight), + "inWeight and BBWeight do not match", &DI); + } + } else if (DI.inCount == 0 && DI.outCount > 0) { + // If this is a block with no predecessors. + if (!isExitReachable) + CheckValue(!Equals(DI.BBWeight,DI.outWeight), + "BBWeight and outWeight do not match", &DI); + } else { + // If this block has successors and predecessors. + if (DI.inWeight > DI.outWeight && !isExitReachable) + CheckValue(!Equals(DI.inWeight,DI.outWeight), + "inWeight and outWeight do not match", &DI); + if (DI.inWeight < DI.outWeight && !isSetJmpTarget) + CheckValue(!Equals(DI.inWeight,DI.outWeight), + "inWeight and outWeight do not match", &DI); + } + + + // Mark this block as visited, rescurse into successors. + BBisVisited.insert(BB); + for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + bbi != bbe; ++bbi ) { + recurseBasicBlock(*bbi); + } + } + + template<class FType, class BType> + bool ProfileVerifierPassT<FType, BType>::runOnFunction(FType &F) { + PI = getAnalysisIfAvailable<ProfileInfoT<FType, BType> >(); + if (!PI) + ASSERTMESSAGE("No ProfileInfo available"); + + // Prepare global variables. + PrintedDebugTree = false; + BBisVisited.clear(); + + // Fetch entry block and recurse into it. + const BType *entry = &F.getEntryBlock(); + recurseBasicBlock(entry); + + if (PI->getExecutionCount(&F) != PI->getExecutionCount(entry)) + ASSERTMESSAGE("Function count and entry block count do not match"); + + return false; + } + + template<class FType, class BType> + char ProfileVerifierPassT<FType, BType>::ID = 0; +} + +INITIALIZE_PASS(ProfileVerifierPass, "profile-verifier", + "Verify profiling information", false, true); + +namespace llvm { + FunctionPass *createProfileVerifierPass() { + return new ProfileVerifierPass(ProfileVerifierDisableAssertions); + } +} + diff --git a/contrib/llvm/lib/Analysis/README.txt b/contrib/llvm/lib/Analysis/README.txt new file mode 100644 index 0000000..0e96e4c --- /dev/null +++ b/contrib/llvm/lib/Analysis/README.txt @@ -0,0 +1,30 @@ +Analysis Opportunities: + +//===---------------------------------------------------------------------===// + +In test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll, the +ScalarEvolution expression for %r is this: + + {1,+,3,+,2}<loop> + +Outside the loop, this could be evaluated simply as (%n * %n), however +ScalarEvolution currently evaluates it as + + (-2 + (2 * (trunc i65 (((zext i64 (-2 + %n) to i65) * (zext i64 (-1 + %n) to i65)) /u 2) to i64)) + (3 * %n)) + +In addition to being much more complicated, it involves i65 arithmetic, +which is very inefficient when expanded into code. + +//===---------------------------------------------------------------------===// + +In formatValue in test/CodeGen/X86/lsr-delayed-fold.ll, + +ScalarEvolution is forming this expression: + +((trunc i64 (-1 * %arg5) to i32) + (trunc i64 %arg5 to i32) + (-1 * (trunc i64 undef to i32))) + +This could be folded to + +(-1 * (trunc i64 undef to i32)) + +//===---------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp new file mode 100644 index 0000000..abc057a --- /dev/null +++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp @@ -0,0 +1,749 @@ +//===- RegionInfo.cpp - SESE region detection analysis --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Detects single entry single exit regions in the control flow graph. +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/RegionInfo.h" +#include "llvm/Analysis/RegionIterator.h" + +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Analysis/LoopInfo.h" + +#define DEBUG_TYPE "region" +#include "llvm/Support/Debug.h" + +#include <set> +#include <algorithm> + +using namespace llvm; + +// Always verify if expensive checking is enabled. +#ifdef XDEBUG +static bool VerifyRegionInfo = true; +#else +static bool VerifyRegionInfo = false; +#endif + +static cl::opt<bool,true> +VerifyRegionInfoX("verify-region-info", cl::location(VerifyRegionInfo), + cl::desc("Verify region info (time consuming)")); + +STATISTIC(numRegions, "The # of regions"); +STATISTIC(numSimpleRegions, "The # of simple regions"); + +//===----------------------------------------------------------------------===// +/// PrintStyle - Print region in difference ways. +enum PrintStyle { PrintNone, PrintBB, PrintRN }; + +cl::opt<enum PrintStyle> printStyle("print-region-style", cl::Hidden, + cl::desc("style of printing regions"), + cl::values( + clEnumValN(PrintNone, "none", "print no details"), + clEnumValN(PrintBB, "bb", "print regions in detail with block_iterator"), + clEnumValN(PrintRN, "rn", "print regions in detail with element_iterator"), + clEnumValEnd)); +//===----------------------------------------------------------------------===// +/// Region Implementation +Region::Region(BasicBlock *Entry, BasicBlock *Exit, RegionInfo* RInfo, + DominatorTree *dt, Region *Parent) + : RegionNode(Parent, Entry, 1), RI(RInfo), DT(dt), exit(Exit) {} + +Region::~Region() { + // Free the cached nodes. + for (BBNodeMapT::iterator it = BBNodeMap.begin(), + ie = BBNodeMap.end(); it != ie; ++it) + delete it->second; + + // Only clean the cache for this Region. Caches of child Regions will be + // cleaned when the child Regions are deleted. + BBNodeMap.clear(); + + for (iterator I = begin(), E = end(); I != E; ++I) + delete *I; +} + +bool Region::contains(const BasicBlock *B) const { + BasicBlock *BB = const_cast<BasicBlock*>(B); + + assert(DT->getNode(BB) && "BB not part of the dominance tree"); + + BasicBlock *entry = getEntry(), *exit = getExit(); + + // Toplevel region. + if (!exit) + return true; + + return (DT->dominates(entry, BB) + && !(DT->dominates(exit, BB) && DT->dominates(entry, exit))); +} + +bool Region::contains(const Loop *L) const { + // BBs that are not part of any loop are element of the Loop + // described by the NULL pointer. This loop is not part of any region, + // except if the region describes the whole function. + if (L == 0) + return getExit() == 0; + + if (!contains(L->getHeader())) + return false; + + SmallVector<BasicBlock *, 8> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + for (SmallVectorImpl<BasicBlock*>::iterator BI = ExitingBlocks.begin(), + BE = ExitingBlocks.end(); BI != BE; ++BI) + if (!contains(*BI)) + return false; + + return true; +} + +Loop *Region::outermostLoopInRegion(Loop *L) const { + if (!contains(L)) + return 0; + + while (L && contains(L->getParentLoop())) { + L = L->getParentLoop(); + } + + return L; +} + +Loop *Region::outermostLoopInRegion(LoopInfo *LI, BasicBlock* BB) const { + assert(LI && BB && "LI and BB cannot be null!"); + Loop *L = LI->getLoopFor(BB); + return outermostLoopInRegion(L); +} + +bool Region::isSimple() const { + bool isSimple = true; + bool found = false; + + BasicBlock *entry = getEntry(), *exit = getExit(); + + // TopLevelRegion + if (!exit) + return false; + + for (pred_iterator PI = pred_begin(entry), PE = pred_end(entry); PI != PE; + ++PI) { + BasicBlock *Pred = *PI; + if (DT->getNode(Pred) && !contains(Pred)) { + if (found) { + isSimple = false; + break; + } + found = true; + } + } + + found = false; + + for (pred_iterator PI = pred_begin(exit), PE = pred_end(exit); PI != PE; + ++PI) + if (contains(*PI)) { + if (found) { + isSimple = false; + break; + } + found = true; + } + + return isSimple; +} + +std::string Region::getNameStr() const { + std::string exitName; + std::string entryName; + + if (getEntry()->getName().empty()) { + raw_string_ostream OS(entryName); + + WriteAsOperand(OS, getEntry(), false); + entryName = OS.str(); + } else + entryName = getEntry()->getNameStr(); + + if (getExit()) { + if (getExit()->getName().empty()) { + raw_string_ostream OS(exitName); + + WriteAsOperand(OS, getExit(), false); + exitName = OS.str(); + } else + exitName = getExit()->getNameStr(); + } else + exitName = "<Function Return>"; + + return entryName + " => " + exitName; +} + +void Region::verifyBBInRegion(BasicBlock *BB) const { + if (!contains(BB)) + llvm_unreachable("Broken region found!"); + + BasicBlock *entry = getEntry(), *exit = getExit(); + + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) + if (!contains(*SI) && exit != *SI) + llvm_unreachable("Broken region found!"); + + if (entry != BB) + for (pred_iterator SI = pred_begin(BB), SE = pred_end(BB); SI != SE; ++SI) + if (!contains(*SI)) + llvm_unreachable("Broken region found!"); +} + +void Region::verifyWalk(BasicBlock *BB, std::set<BasicBlock*> *visited) const { + BasicBlock *exit = getExit(); + + visited->insert(BB); + + verifyBBInRegion(BB); + + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) + if (*SI != exit && visited->find(*SI) == visited->end()) + verifyWalk(*SI, visited); +} + +void Region::verifyRegion() const { + // Only do verification when user wants to, otherwise this expensive + // check will be invoked by PassManager. + if (!VerifyRegionInfo) return; + + std::set<BasicBlock*> visited; + verifyWalk(getEntry(), &visited); +} + +void Region::verifyRegionNest() const { + for (Region::const_iterator RI = begin(), RE = end(); RI != RE; ++RI) + (*RI)->verifyRegionNest(); + + verifyRegion(); +} + +Region::block_iterator Region::block_begin() { + return GraphTraits<FlatIt<Region*> >::nodes_begin(this); +} + +Region::block_iterator Region::block_end() { + return GraphTraits<FlatIt<Region*> >::nodes_end(this); +} + +Region::const_block_iterator Region::block_begin() const { + return GraphTraits<FlatIt<const Region*> >::nodes_begin(this); +} + +Region::const_block_iterator Region::block_end() const { + return GraphTraits<FlatIt<const Region*> >::nodes_end(this); +} + +Region::element_iterator Region::element_begin() { + return GraphTraits<Region*>::nodes_begin(this); +} + +Region::element_iterator Region::element_end() { + return GraphTraits<Region*>::nodes_end(this); +} + +Region::const_element_iterator Region::element_begin() const { + return GraphTraits<const Region*>::nodes_begin(this); +} + +Region::const_element_iterator Region::element_end() const { + return GraphTraits<const Region*>::nodes_end(this); +} + +Region* Region::getSubRegionNode(BasicBlock *BB) const { + Region *R = RI->getRegionFor(BB); + + if (!R || R == this) + return 0; + + // If we pass the BB out of this region, that means our code is broken. + assert(contains(R) && "BB not in current region!"); + + while (contains(R->getParent()) && R->getParent() != this) + R = R->getParent(); + + if (R->getEntry() != BB) + return 0; + + return R; +} + +RegionNode* Region::getBBNode(BasicBlock *BB) const { + assert(contains(BB) && "Can get BB node out of this region!"); + + BBNodeMapT::const_iterator at = BBNodeMap.find(BB); + + if (at != BBNodeMap.end()) + return at->second; + + RegionNode *NewNode = new RegionNode(const_cast<Region*>(this), BB); + BBNodeMap.insert(std::make_pair(BB, NewNode)); + return NewNode; +} + +RegionNode* Region::getNode(BasicBlock *BB) const { + assert(contains(BB) && "Can get BB node out of this region!"); + if (Region* Child = getSubRegionNode(BB)) + return Child->getNode(); + + return getBBNode(BB); +} + +void Region::transferChildrenTo(Region *To) { + for (iterator I = begin(), E = end(); I != E; ++I) { + (*I)->parent = To; + To->children.push_back(*I); + } + children.clear(); +} + +void Region::addSubRegion(Region *SubRegion) { + assert(SubRegion->parent == 0 && "SubRegion already has a parent!"); + SubRegion->parent = this; + // Set up the region node. + assert(std::find(children.begin(), children.end(), SubRegion) == children.end() + && "Node already exist!"); + children.push_back(SubRegion); +} + + +Region *Region::removeSubRegion(Region *Child) { + assert(Child->parent == this && "Child is not a child of this region!"); + Child->parent = 0; + RegionSet::iterator I = std::find(children.begin(), children.end(), Child); + assert(I != children.end() && "Region does not exit. Unable to remove."); + children.erase(children.begin()+(I-begin())); + return Child; +} + +unsigned Region::getDepth() const { + unsigned Depth = 0; + + for (Region *R = parent; R != 0; R = R->parent) + ++Depth; + + return Depth; +} + +void Region::print(raw_ostream &OS, bool print_tree, unsigned level) const { + if (print_tree) + OS.indent(level*2) << "[" << level << "] " << getNameStr(); + else + OS.indent(level*2) << getNameStr(); + + OS << "\n"; + + + if (printStyle != PrintNone) { + OS.indent(level*2) << "{\n"; + OS.indent(level*2 + 2); + + if (printStyle == PrintBB) { + for (const_block_iterator I = block_begin(), E = block_end(); I!=E; ++I) + OS << **I << ", "; // TODO: remove the last "," + } else if (printStyle == PrintRN) { + for (const_element_iterator I = element_begin(), E = element_end(); I!=E; ++I) + OS << **I << ", "; // TODO: remove the last ", + } + + OS << "\n"; + } + + if (print_tree) + for (const_iterator RI = begin(), RE = end(); RI != RE; ++RI) + (*RI)->print(OS, print_tree, level+1); + + if (printStyle != PrintNone) + OS.indent(level*2) << "} \n"; +} + +void Region::dump() const { + print(dbgs(), true, getDepth()); +} + +void Region::clearNodeCache() { + BBNodeMap.clear(); + for (Region::iterator RI = begin(), RE = end(); RI != RE; ++RI) + (*RI)->clearNodeCache(); +} + +//===----------------------------------------------------------------------===// +// RegionInfo implementation +// + +bool RegionInfo::isCommonDomFrontier(BasicBlock *BB, BasicBlock *entry, + BasicBlock *exit) const { + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { + BasicBlock *P = *PI; + if (DT->dominates(entry, P) && !DT->dominates(exit, P)) + return false; + } + return true; +} + +bool RegionInfo::isRegion(BasicBlock *entry, BasicBlock *exit) const { + assert(entry && exit && "entry and exit must not be null!"); + typedef DominanceFrontier::DomSetType DST; + + DST *entrySuccs = &DF->find(entry)->second; + + // Exit is the header of a loop that contains the entry. In this case, + // the dominance frontier must only contain the exit. + if (!DT->dominates(entry, exit)) { + for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end(); + SI != SE; ++SI) + if (*SI != exit && *SI != entry) + return false; + + return true; + } + + DST *exitSuccs = &DF->find(exit)->second; + + // Do not allow edges leaving the region. + for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end(); + SI != SE; ++SI) { + if (*SI == exit || *SI == entry) + continue; + if (exitSuccs->find(*SI) == exitSuccs->end()) + return false; + if (!isCommonDomFrontier(*SI, entry, exit)) + return false; + } + + // Do not allow edges pointing into the region. + for (DST::iterator SI = exitSuccs->begin(), SE = exitSuccs->end(); + SI != SE; ++SI) + if (DT->properlyDominates(entry, *SI) && *SI != exit) + return false; + + + return true; +} + +void RegionInfo::insertShortCut(BasicBlock *entry, BasicBlock *exit, + BBtoBBMap *ShortCut) const { + assert(entry && exit && "entry and exit must not be null!"); + + BBtoBBMap::iterator e = ShortCut->find(exit); + + if (e == ShortCut->end()) + // No further region at exit available. + (*ShortCut)[entry] = exit; + else { + // We found a region e that starts at exit. Therefore (entry, e->second) + // is also a region, that is larger than (entry, exit). Insert the + // larger one. + BasicBlock *BB = e->second; + (*ShortCut)[entry] = BB; + } +} + +DomTreeNode* RegionInfo::getNextPostDom(DomTreeNode* N, + BBtoBBMap *ShortCut) const { + BBtoBBMap::iterator e = ShortCut->find(N->getBlock()); + + if (e == ShortCut->end()) + return N->getIDom(); + + return PDT->getNode(e->second)->getIDom(); +} + +bool RegionInfo::isTrivialRegion(BasicBlock *entry, BasicBlock *exit) const { + assert(entry && exit && "entry and exit must not be null!"); + + unsigned num_successors = succ_end(entry) - succ_begin(entry); + + if (num_successors <= 1 && exit == *(succ_begin(entry))) + return true; + + return false; +} + +void RegionInfo::updateStatistics(Region *R) { + ++numRegions; + + // TODO: Slow. Should only be enabled if -stats is used. + if (R->isSimple()) ++numSimpleRegions; +} + +Region *RegionInfo::createRegion(BasicBlock *entry, BasicBlock *exit) { + assert(entry && exit && "entry and exit must not be null!"); + + if (isTrivialRegion(entry, exit)) + return 0; + + Region *region = new Region(entry, exit, this, DT); + BBtoRegion.insert(std::make_pair(entry, region)); + + #ifdef XDEBUG + region->verifyRegion(); + #else + DEBUG(region->verifyRegion()); + #endif + + updateStatistics(region); + return region; +} + +void RegionInfo::findRegionsWithEntry(BasicBlock *entry, BBtoBBMap *ShortCut) { + assert(entry); + + DomTreeNode *N = PDT->getNode(entry); + + if (!N) + return; + + Region *lastRegion= 0; + BasicBlock *lastExit = entry; + + // As only a BasicBlock that postdominates entry can finish a region, walk the + // post dominance tree upwards. + while ((N = getNextPostDom(N, ShortCut))) { + BasicBlock *exit = N->getBlock(); + + if (!exit) + break; + + if (isRegion(entry, exit)) { + Region *newRegion = createRegion(entry, exit); + + if (lastRegion) + newRegion->addSubRegion(lastRegion); + + lastRegion = newRegion; + lastExit = exit; + } + + // This can never be a region, so stop the search. + if (!DT->dominates(entry, exit)) + break; + } + + // Tried to create regions from entry to lastExit. Next time take a + // shortcut from entry to lastExit. + if (lastExit != entry) + insertShortCut(entry, lastExit, ShortCut); +} + +void RegionInfo::scanForRegions(Function &F, BBtoBBMap *ShortCut) { + BasicBlock *entry = &(F.getEntryBlock()); + DomTreeNode *N = DT->getNode(entry); + + // Iterate over the dominance tree in post order to start with the small + // regions from the bottom of the dominance tree. If the small regions are + // detected first, detection of bigger regions is faster, as we can jump + // over the small regions. + for (po_iterator<DomTreeNode*> FI = po_begin(N), FE = po_end(N); FI != FE; + ++FI) { + findRegionsWithEntry(FI->getBlock(), ShortCut); + } +} + +Region *RegionInfo::getTopMostParent(Region *region) { + while (region->parent) + region = region->getParent(); + + return region; +} + +void RegionInfo::buildRegionsTree(DomTreeNode *N, Region *region) { + BasicBlock *BB = N->getBlock(); + + // Passed region exit + while (BB == region->getExit()) + region = region->getParent(); + + BBtoRegionMap::iterator it = BBtoRegion.find(BB); + + // This basic block is a start block of a region. It is already in the + // BBtoRegion relation. Only the child basic blocks have to be updated. + if (it != BBtoRegion.end()) { + Region *newRegion = it->second;; + region->addSubRegion(getTopMostParent(newRegion)); + region = newRegion; + } else { + BBtoRegion[BB] = region; + } + + for (DomTreeNode::iterator CI = N->begin(), CE = N->end(); CI != CE; ++CI) + buildRegionsTree(*CI, region); +} + +void RegionInfo::releaseMemory() { + BBtoRegion.clear(); + if (TopLevelRegion) + delete TopLevelRegion; + TopLevelRegion = 0; +} + +RegionInfo::RegionInfo() : FunctionPass(ID) { + TopLevelRegion = 0; +} + +RegionInfo::~RegionInfo() { + releaseMemory(); +} + +void RegionInfo::Calculate(Function &F) { + // ShortCut a function where for every BB the exit of the largest region + // starting with BB is stored. These regions can be threated as single BBS. + // This improves performance on linear CFGs. + BBtoBBMap ShortCut; + + scanForRegions(F, &ShortCut); + BasicBlock *BB = &F.getEntryBlock(); + buildRegionsTree(DT->getNode(BB), TopLevelRegion); +} + +bool RegionInfo::runOnFunction(Function &F) { + releaseMemory(); + + DT = &getAnalysis<DominatorTree>(); + PDT = &getAnalysis<PostDominatorTree>(); + DF = &getAnalysis<DominanceFrontier>(); + + TopLevelRegion = new Region(&F.getEntryBlock(), 0, this, DT, 0); + updateStatistics(TopLevelRegion); + + Calculate(F); + + return false; +} + +void RegionInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<DominatorTree>(); + AU.addRequired<PostDominatorTree>(); + AU.addRequired<DominanceFrontier>(); +} + +void RegionInfo::print(raw_ostream &OS, const Module *) const { + OS << "Region tree:\n"; + TopLevelRegion->print(OS, true, 0); + OS << "End region tree\n"; +} + +void RegionInfo::verifyAnalysis() const { + // Only do verification when user wants to, otherwise this expensive check + // will be invoked by PMDataManager::verifyPreservedAnalysis when + // a regionpass (marked PreservedAll) finish. + if (!VerifyRegionInfo) return; + + TopLevelRegion->verifyRegionNest(); +} + +// Region pass manager support. +Region *RegionInfo::getRegionFor(BasicBlock *BB) const { + BBtoRegionMap::const_iterator I= + BBtoRegion.find(BB); + return I != BBtoRegion.end() ? I->second : 0; +} + +Region *RegionInfo::operator[](BasicBlock *BB) const { + return getRegionFor(BB); +} + + +BasicBlock *RegionInfo::getMaxRegionExit(BasicBlock *BB) const { + BasicBlock *Exit = NULL; + + while (true) { + // Get largest region that starts at BB. + Region *R = getRegionFor(BB); + while (R && R->getParent() && R->getParent()->getEntry() == BB) + R = R->getParent(); + + // Get the single exit of BB. + if (R && R->getEntry() == BB) + Exit = R->getExit(); + else if (++succ_begin(BB) == succ_end(BB)) + Exit = *succ_begin(BB); + else // No single exit exists. + return Exit; + + // Get largest region that starts at Exit. + Region *ExitR = getRegionFor(Exit); + while (ExitR && ExitR->getParent() + && ExitR->getParent()->getEntry() == Exit) + ExitR = ExitR->getParent(); + + for (pred_iterator PI = pred_begin(Exit), PE = pred_end(Exit); PI != PE; + ++PI) + if (!R->contains(*PI) && !ExitR->contains(*PI)) + break; + + // This stops infinite cycles. + if (DT->dominates(Exit, BB)) + break; + + BB = Exit; + } + + return Exit; +} + +Region* +RegionInfo::getCommonRegion(Region *A, Region *B) const { + assert (A && B && "One of the Regions is NULL"); + + if (A->contains(B)) return A; + + while (!B->contains(A)) + B = B->getParent(); + + return B; +} + +Region* +RegionInfo::getCommonRegion(SmallVectorImpl<Region*> &Regions) const { + Region* ret = Regions.back(); + Regions.pop_back(); + + for (SmallVectorImpl<Region*>::const_iterator I = Regions.begin(), + E = Regions.end(); I != E; ++I) + ret = getCommonRegion(ret, *I); + + return ret; +} + +Region* +RegionInfo::getCommonRegion(SmallVectorImpl<BasicBlock*> &BBs) const { + Region* ret = getRegionFor(BBs.back()); + BBs.pop_back(); + + for (SmallVectorImpl<BasicBlock*>::const_iterator I = BBs.begin(), + E = BBs.end(); I != E; ++I) + ret = getCommonRegion(ret, getRegionFor(*I)); + + return ret; +} + +char RegionInfo::ID = 0; +INITIALIZE_PASS(RegionInfo, "regions", + "Detect single entry single exit regions", true, true); + +// Create methods available outside of this file, to use them +// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by +// the link time optimization. + +namespace llvm { + FunctionPass *createRegionInfoPass() { + return new RegionInfo(); + } +} + diff --git a/contrib/llvm/lib/Analysis/RegionPrinter.cpp b/contrib/llvm/lib/Analysis/RegionPrinter.cpp new file mode 100644 index 0000000..fee5c1b --- /dev/null +++ b/contrib/llvm/lib/Analysis/RegionPrinter.cpp @@ -0,0 +1,186 @@ +//===- RegionPrinter.cpp - Print regions tree pass ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Print out the region tree of a function using dotty/graphviz. +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/RegionInfo.h" +#include "llvm/Analysis/RegionIterator.h" +#include "llvm/Analysis/RegionPrinter.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/DOTGraphTraitsPass.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +/// onlySimpleRegion - Show only the simple regions in the RegionViewer. +static cl::opt<bool> +onlySimpleRegions("only-simple-regions", + cl::desc("Show only simple regions in the graphviz viewer"), + cl::Hidden, + cl::init(false)); + +namespace llvm { +template<> +struct DOTGraphTraits<RegionNode*> : public DefaultDOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) + : DefaultDOTGraphTraits(isSimple) {} + + std::string getNodeLabel(RegionNode *Node, RegionNode *Graph) { + + if (!Node->isSubRegion()) { + BasicBlock *BB = Node->getNodeAs<BasicBlock>(); + + if (isSimple()) + return DOTGraphTraits<const Function*> + ::getSimpleNodeLabel(BB, BB->getParent()); + else + return DOTGraphTraits<const Function*> + ::getCompleteNodeLabel(BB, BB->getParent()); + } + + return "Not implemented"; + } +}; + +template<> +struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> { + + DOTGraphTraits (bool isSimple=false) + : DOTGraphTraits<RegionNode*>(isSimple) {} + + static std::string getGraphName(RegionInfo *DT) { + return "Region Graph"; + } + + std::string getNodeLabel(RegionNode *Node, RegionInfo *G) { + return DOTGraphTraits<RegionNode*>::getNodeLabel(Node, + G->getTopLevelRegion()); + } + + // Print the cluster of the subregions. This groups the single basic blocks + // and adds a different background color for each group. + static void printRegionCluster(const Region *R, GraphWriter<RegionInfo*> &GW, + unsigned depth = 0) { + raw_ostream &O = GW.getOStream(); + O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(R) + << " {\n"; + O.indent(2 * (depth + 1)) << "label = \"\";\n"; + + if (!onlySimpleRegions || R->isSimple()) { + O.indent(2 * (depth + 1)) << "style = filled;\n"; + O.indent(2 * (depth + 1)) << "color = " + << ((R->getDepth() * 2 % 12) + 1) << "\n"; + + } else { + O.indent(2 * (depth + 1)) << "style = solid;\n"; + O.indent(2 * (depth + 1)) << "color = " + << ((R->getDepth() * 2 % 12) + 2) << "\n"; + } + + for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI) + printRegionCluster(*RI, GW, depth + 1); + + RegionInfo *RI = R->getRegionInfo(); + + for (Region::const_block_iterator BI = R->block_begin(), + BE = R->block_end(); BI != BE; ++BI) { + BasicBlock *BB = (*BI)->getNodeAs<BasicBlock>(); + if (RI->getRegionFor(BB) == R) + O.indent(2 * (depth + 1)) << "Node" + << static_cast<const void*>(RI->getTopLevelRegion()->getBBNode(BB)) + << ";\n"; + } + + O.indent(2 * depth) << "}\n"; + } + + static void addCustomGraphFeatures(const RegionInfo* RI, + GraphWriter<RegionInfo*> &GW) { + raw_ostream &O = GW.getOStream(); + O << "\tcolorscheme = \"paired12\"\n"; + printRegionCluster(RI->getTopLevelRegion(), GW, 4); + } +}; +} //end namespace llvm + +namespace { + +struct RegionViewer + : public DOTGraphTraitsViewer<RegionInfo, false> { + static char ID; + RegionViewer() : DOTGraphTraitsViewer<RegionInfo, false>("reg", ID){} +}; + +char RegionViewer::ID = 0; +INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function", + true, true); + +struct RegionOnlyViewer + : public DOTGraphTraitsViewer<RegionInfo, true> { + static char ID; + RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfo, true>("regonly", ID){} +}; + +char RegionOnlyViewer::ID = 0; +INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only", + "View regions of function (with no function bodies)", + true, true); + +struct RegionPrinter + : public DOTGraphTraitsPrinter<RegionInfo, false> { + static char ID; + RegionPrinter() : + DOTGraphTraitsPrinter<RegionInfo, false>("reg", ID) {} +}; +} //end anonymous namespace + +char RegionPrinter::ID = 0; +INITIALIZE_PASS(RegionPrinter, "dot-regions", + "Print regions of function to 'dot' file", true, true); + +namespace { + +struct RegionOnlyPrinter + : public DOTGraphTraitsPrinter<RegionInfo, true> { + static char ID; + RegionOnlyPrinter() : + DOTGraphTraitsPrinter<RegionInfo, true>("reg", ID) {} +}; + +} + +char RegionOnlyPrinter::ID = 0; +INITIALIZE_PASS(RegionOnlyPrinter, "dot-regions-only", + "Print regions of function to 'dot' file " + "(with no function bodies)", + true, true); + +FunctionPass* llvm::createRegionViewerPass() { + return new RegionViewer(); +} + +FunctionPass* llvm::createRegionOnlyViewerPass() { + return new RegionOnlyViewer(); +} + +FunctionPass* llvm::createRegionPrinterPass() { + return new RegionPrinter(); +} + +FunctionPass* llvm::createRegionOnlyPrinterPass() { + return new RegionOnlyPrinter(); +} + diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp new file mode 100644 index 0000000..b892d85 --- /dev/null +++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp @@ -0,0 +1,5955 @@ +//===- ScalarEvolution.cpp - Scalar Evolution Analysis ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the scalar evolution analysis +// engine, which is used primarily to analyze expressions involving induction +// variables in loops. +// +// There are several aspects to this library. First is the representation of +// scalar expressions, which are represented as subclasses of the SCEV class. +// These classes are used to represent certain types of subexpressions that we +// can handle. We only create one SCEV of a particular shape, so +// pointer-comparisons for equality are legal. +// +// One important aspect of the SCEV objects is that they are never cyclic, even +// if there is a cycle in the dataflow for an expression (ie, a PHI node). If +// the PHI node is one of the idioms that we can represent (e.g., a polynomial +// recurrence) then we represent it directly as a recurrence node, otherwise we +// represent it as a SCEVUnknown node. +// +// In addition to being able to represent expressions of various types, we also +// have folders that are used to build the *canonical* representation for a +// particular expression. These folders are capable of using a variety of +// rewrite rules to simplify the expressions. +// +// Once the folders are defined, we can implement the more interesting +// higher-level code, such as the code that recognizes PHI nodes of various +// types, computes the execution count of a loop, etc. +// +// TODO: We should use these routines and value representations to implement +// dependence analysis! +// +//===----------------------------------------------------------------------===// +// +// There are several good references for the techniques used in this analysis. +// +// Chains of recurrences -- a method to expedite the evaluation +// of closed-form functions +// Olaf Bachmann, Paul S. Wang, Eugene V. Zima +// +// On computational properties of chains of recurrences +// Eugene V. Zima +// +// Symbolic Evaluation of Chains of Recurrences for Loop Optimization +// Robert A. van Engelen +// +// Efficient Symbolic Analysis for Optimizing Compilers +// Robert A. van Engelen +// +// Using the chains of recurrences algebra for data dependence testing and +// induction variable substitution +// MS Thesis, Johnie Birch +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "scalar-evolution" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/GlobalVariable.h" +#include "llvm/GlobalAlias.h" +#include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" +#include "llvm/Operator.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ConstantRange.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumArrayLenItCounts, + "Number of trip counts computed with array length"); +STATISTIC(NumTripCountsComputed, + "Number of loops with predictable loop counts"); +STATISTIC(NumTripCountsNotComputed, + "Number of loops without predictable loop counts"); +STATISTIC(NumBruteForceTripCountsComputed, + "Number of loops with trip counts computed by force"); + +static cl::opt<unsigned> +MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, + cl::desc("Maximum number of iterations SCEV will " + "symbolically execute a constant " + "derived loop"), + cl::init(100)); + +INITIALIZE_PASS(ScalarEvolution, "scalar-evolution", + "Scalar Evolution Analysis", false, true); +char ScalarEvolution::ID = 0; + +//===----------------------------------------------------------------------===// +// SCEV class definitions +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Implementation of the SCEV class. +// + +SCEV::~SCEV() {} + +void SCEV::dump() const { + print(dbgs()); + dbgs() << '\n'; +} + +bool SCEV::isZero() const { + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this)) + return SC->getValue()->isZero(); + return false; +} + +bool SCEV::isOne() const { + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this)) + return SC->getValue()->isOne(); + return false; +} + +bool SCEV::isAllOnesValue() const { + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this)) + return SC->getValue()->isAllOnesValue(); + return false; +} + +SCEVCouldNotCompute::SCEVCouldNotCompute() : + SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {} + +bool SCEVCouldNotCompute::isLoopInvariant(const Loop *L) const { + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + return false; +} + +const Type *SCEVCouldNotCompute::getType() const { + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + return 0; +} + +bool SCEVCouldNotCompute::hasComputableLoopEvolution(const Loop *L) const { + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + return false; +} + +bool SCEVCouldNotCompute::hasOperand(const SCEV *) const { + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + return false; +} + +void SCEVCouldNotCompute::print(raw_ostream &OS) const { + OS << "***COULDNOTCOMPUTE***"; +} + +bool SCEVCouldNotCompute::classof(const SCEV *S) { + return S->getSCEVType() == scCouldNotCompute; +} + +const SCEV *ScalarEvolution::getConstant(ConstantInt *V) { + FoldingSetNodeID ID; + ID.AddInteger(scConstant); + ID.AddPointer(V); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + +const SCEV *ScalarEvolution::getConstant(const APInt& Val) { + return getConstant(ConstantInt::get(getContext(), Val)); +} + +const SCEV * +ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) { + const IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty)); + return getConstant(ConstantInt::get(ITy, V, isSigned)); +} + +const Type *SCEVConstant::getType() const { return V->getType(); } + +void SCEVConstant::print(raw_ostream &OS) const { + WriteAsOperand(OS, V, false); +} + +SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, + unsigned SCEVTy, const SCEV *op, const Type *ty) + : SCEV(ID, SCEVTy), Op(op), Ty(ty) {} + +bool SCEVCastExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { + return Op->dominates(BB, DT); +} + +bool SCEVCastExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { + return Op->properlyDominates(BB, DT); +} + +SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, + const SCEV *op, const Type *ty) + : SCEVCastExpr(ID, scTruncate, op, ty) { + assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot truncate non-integer value!"); +} + +void SCEVTruncateExpr::print(raw_ostream &OS) const { + OS << "(trunc " << *Op->getType() << " " << *Op << " to " << *Ty << ")"; +} + +SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID, + const SCEV *op, const Type *ty) + : SCEVCastExpr(ID, scZeroExtend, op, ty) { + assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot zero extend non-integer value!"); +} + +void SCEVZeroExtendExpr::print(raw_ostream &OS) const { + OS << "(zext " << *Op->getType() << " " << *Op << " to " << *Ty << ")"; +} + +SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID, + const SCEV *op, const Type *ty) + : SCEVCastExpr(ID, scSignExtend, op, ty) { + assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot sign extend non-integer value!"); +} + +void SCEVSignExtendExpr::print(raw_ostream &OS) const { + OS << "(sext " << *Op->getType() << " " << *Op << " to " << *Ty << ")"; +} + +void SCEVCommutativeExpr::print(raw_ostream &OS) const { + const char *OpStr = getOperationStr(); + OS << "("; + for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) { + OS << **I; + if (llvm::next(I) != E) + OS << OpStr; + } + OS << ")"; +} + +bool SCEVNAryExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { + for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) + if (!(*I)->dominates(BB, DT)) + return false; + return true; +} + +bool SCEVNAryExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { + for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) + if (!(*I)->properlyDominates(BB, DT)) + return false; + return true; +} + +bool SCEVNAryExpr::isLoopInvariant(const Loop *L) const { + for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) + if (!(*I)->isLoopInvariant(L)) + return false; + return true; +} + +// hasComputableLoopEvolution - N-ary expressions have computable loop +// evolutions iff they have at least one operand that varies with the loop, +// but that all varying operands are computable. +bool SCEVNAryExpr::hasComputableLoopEvolution(const Loop *L) const { + bool HasVarying = false; + for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) { + const SCEV *S = *I; + if (!S->isLoopInvariant(L)) { + if (S->hasComputableLoopEvolution(L)) + HasVarying = true; + else + return false; + } + } + return HasVarying; +} + +bool SCEVNAryExpr::hasOperand(const SCEV *O) const { + for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) { + const SCEV *S = *I; + if (O == S || S->hasOperand(O)) + return true; + } + return false; +} + +bool SCEVUDivExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { + return LHS->dominates(BB, DT) && RHS->dominates(BB, DT); +} + +bool SCEVUDivExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { + return LHS->properlyDominates(BB, DT) && RHS->properlyDominates(BB, DT); +} + +void SCEVUDivExpr::print(raw_ostream &OS) const { + OS << "(" << *LHS << " /u " << *RHS << ")"; +} + +const Type *SCEVUDivExpr::getType() const { + // In most cases the types of LHS and RHS will be the same, but in some + // crazy cases one or the other may be a pointer. ScalarEvolution doesn't + // depend on the type for correctness, but handling types carefully can + // avoid extra casts in the SCEVExpander. The LHS is more likely to be + // a pointer type than the RHS, so use the RHS' type here. + return RHS->getType(); +} + +bool SCEVAddRecExpr::isLoopInvariant(const Loop *QueryLoop) const { + // Add recurrences are never invariant in the function-body (null loop). + if (!QueryLoop) + return false; + + // This recurrence is variant w.r.t. QueryLoop if QueryLoop contains L. + if (QueryLoop->contains(L)) + return false; + + // This recurrence is invariant w.r.t. QueryLoop if L contains QueryLoop. + if (L->contains(QueryLoop)) + return true; + + // This recurrence is variant w.r.t. QueryLoop if any of its operands + // are variant. + for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) + if (!(*I)->isLoopInvariant(QueryLoop)) + return false; + + // Otherwise it's loop-invariant. + return true; +} + +bool +SCEVAddRecExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { + return DT->dominates(L->getHeader(), BB) && + SCEVNAryExpr::dominates(BB, DT); +} + +bool +SCEVAddRecExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { + // This uses a "dominates" query instead of "properly dominates" query because + // the instruction which produces the addrec's value is a PHI, and a PHI + // effectively properly dominates its entire containing block. + return DT->dominates(L->getHeader(), BB) && + SCEVNAryExpr::properlyDominates(BB, DT); +} + +void SCEVAddRecExpr::print(raw_ostream &OS) const { + OS << "{" << *Operands[0]; + for (unsigned i = 1, e = NumOperands; i != e; ++i) + OS << ",+," << *Operands[i]; + OS << "}<"; + WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false); + OS << ">"; +} + +void SCEVUnknown::deleted() { + // Clear this SCEVUnknown from ValuesAtScopes. + SE->ValuesAtScopes.erase(this); + + // Remove this SCEVUnknown from the uniquing map. + SE->UniqueSCEVs.RemoveNode(this); + + // Release the value. + setValPtr(0); +} + +void SCEVUnknown::allUsesReplacedWith(Value *New) { + // Clear this SCEVUnknown from ValuesAtScopes. + SE->ValuesAtScopes.erase(this); + + // Remove this SCEVUnknown from the uniquing map. + SE->UniqueSCEVs.RemoveNode(this); + + // Update this SCEVUnknown to point to the new value. This is needed + // because there may still be outstanding SCEVs which still point to + // this SCEVUnknown. + setValPtr(New); +} + +bool SCEVUnknown::isLoopInvariant(const Loop *L) const { + // All non-instruction values are loop invariant. All instructions are loop + // invariant if they are not contained in the specified loop. + // Instructions are never considered invariant in the function body + // (null loop) because they are defined within the "loop". + if (Instruction *I = dyn_cast<Instruction>(getValue())) + return L && !L->contains(I); + return true; +} + +bool SCEVUnknown::dominates(BasicBlock *BB, DominatorTree *DT) const { + if (Instruction *I = dyn_cast<Instruction>(getValue())) + return DT->dominates(I->getParent(), BB); + return true; +} + +bool SCEVUnknown::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { + if (Instruction *I = dyn_cast<Instruction>(getValue())) + return DT->properlyDominates(I->getParent(), BB); + return true; +} + +const Type *SCEVUnknown::getType() const { + return getValue()->getType(); +} + +bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const { + if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) + if (VCE->getOpcode() == Instruction::PtrToInt) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) + if (CE->getOpcode() == Instruction::GetElementPtr && + CE->getOperand(0)->isNullValue() && + CE->getNumOperands() == 2) + if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1))) + if (CI->isOne()) { + AllocTy = cast<PointerType>(CE->getOperand(0)->getType()) + ->getElementType(); + return true; + } + + return false; +} + +bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const { + if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) + if (VCE->getOpcode() == Instruction::PtrToInt) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) + if (CE->getOpcode() == Instruction::GetElementPtr && + CE->getOperand(0)->isNullValue()) { + const Type *Ty = + cast<PointerType>(CE->getOperand(0)->getType())->getElementType(); + if (const StructType *STy = dyn_cast<StructType>(Ty)) + if (!STy->isPacked() && + CE->getNumOperands() == 3 && + CE->getOperand(1)->isNullValue()) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2))) + if (CI->isOne() && + STy->getNumElements() == 2 && + STy->getElementType(0)->isIntegerTy(1)) { + AllocTy = STy->getElementType(1); + return true; + } + } + } + + return false; +} + +bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const { + if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) + if (VCE->getOpcode() == Instruction::PtrToInt) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) + if (CE->getOpcode() == Instruction::GetElementPtr && + CE->getNumOperands() == 3 && + CE->getOperand(0)->isNullValue() && + CE->getOperand(1)->isNullValue()) { + const Type *Ty = + cast<PointerType>(CE->getOperand(0)->getType())->getElementType(); + // Ignore vector types here so that ScalarEvolutionExpander doesn't + // emit getelementptrs that index into vectors. + if (Ty->isStructTy() || Ty->isArrayTy()) { + CTy = Ty; + FieldNo = CE->getOperand(2); + return true; + } + } + + return false; +} + +void SCEVUnknown::print(raw_ostream &OS) const { + const Type *AllocTy; + if (isSizeOf(AllocTy)) { + OS << "sizeof(" << *AllocTy << ")"; + return; + } + if (isAlignOf(AllocTy)) { + OS << "alignof(" << *AllocTy << ")"; + return; + } + + const Type *CTy; + Constant *FieldNo; + if (isOffsetOf(CTy, FieldNo)) { + OS << "offsetof(" << *CTy << ", "; + WriteAsOperand(OS, FieldNo, false); + OS << ")"; + return; + } + + // Otherwise just print it normally. + WriteAsOperand(OS, getValue(), false); +} + +//===----------------------------------------------------------------------===// +// SCEV Utilities +//===----------------------------------------------------------------------===// + +namespace { + /// SCEVComplexityCompare - Return true if the complexity of the LHS is less + /// than the complexity of the RHS. This comparator is used to canonicalize + /// expressions. + class SCEVComplexityCompare { + const LoopInfo *const LI; + public: + explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {} + + // Return true or false if LHS is less than, or at least RHS, respectively. + bool operator()(const SCEV *LHS, const SCEV *RHS) const { + return compare(LHS, RHS) < 0; + } + + // Return negative, zero, or positive, if LHS is less than, equal to, or + // greater than RHS, respectively. A three-way result allows recursive + // comparisons to be more efficient. + int compare(const SCEV *LHS, const SCEV *RHS) const { + // Fast-path: SCEVs are uniqued so we can do a quick equality check. + if (LHS == RHS) + return 0; + + // Primarily, sort the SCEVs by their getSCEVType(). + unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType(); + if (LType != RType) + return (int)LType - (int)RType; + + // Aside from the getSCEVType() ordering, the particular ordering + // isn't very important except that it's beneficial to be consistent, + // so that (a + b) and (b + a) don't end up as different expressions. + switch (LType) { + case scUnknown: { + const SCEVUnknown *LU = cast<SCEVUnknown>(LHS); + const SCEVUnknown *RU = cast<SCEVUnknown>(RHS); + + // Sort SCEVUnknown values with some loose heuristics. TODO: This is + // not as complete as it could be. + const Value *LV = LU->getValue(), *RV = RU->getValue(); + + // Order pointer values after integer values. This helps SCEVExpander + // form GEPs. + bool LIsPointer = LV->getType()->isPointerTy(), + RIsPointer = RV->getType()->isPointerTy(); + if (LIsPointer != RIsPointer) + return (int)LIsPointer - (int)RIsPointer; + + // Compare getValueID values. + unsigned LID = LV->getValueID(), + RID = RV->getValueID(); + if (LID != RID) + return (int)LID - (int)RID; + + // Sort arguments by their position. + if (const Argument *LA = dyn_cast<Argument>(LV)) { + const Argument *RA = cast<Argument>(RV); + unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo(); + return (int)LArgNo - (int)RArgNo; + } + + // For instructions, compare their loop depth, and their operand + // count. This is pretty loose. + if (const Instruction *LInst = dyn_cast<Instruction>(LV)) { + const Instruction *RInst = cast<Instruction>(RV); + + // Compare loop depths. + const BasicBlock *LParent = LInst->getParent(), + *RParent = RInst->getParent(); + if (LParent != RParent) { + unsigned LDepth = LI->getLoopDepth(LParent), + RDepth = LI->getLoopDepth(RParent); + if (LDepth != RDepth) + return (int)LDepth - (int)RDepth; + } + + // Compare the number of operands. + unsigned LNumOps = LInst->getNumOperands(), + RNumOps = RInst->getNumOperands(); + return (int)LNumOps - (int)RNumOps; + } + + return 0; + } + + case scConstant: { + const SCEVConstant *LC = cast<SCEVConstant>(LHS); + const SCEVConstant *RC = cast<SCEVConstant>(RHS); + + // Compare constant values. + const APInt &LA = LC->getValue()->getValue(); + const APInt &RA = RC->getValue()->getValue(); + unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth(); + if (LBitWidth != RBitWidth) + return (int)LBitWidth - (int)RBitWidth; + return LA.ult(RA) ? -1 : 1; + } + + case scAddRecExpr: { + const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS); + const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS); + + // Compare addrec loop depths. + const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop(); + if (LLoop != RLoop) { + unsigned LDepth = LLoop->getLoopDepth(), + RDepth = RLoop->getLoopDepth(); + if (LDepth != RDepth) + return (int)LDepth - (int)RDepth; + } + + // Addrec complexity grows with operand count. + unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands(); + if (LNumOps != RNumOps) + return (int)LNumOps - (int)RNumOps; + + // Lexicographically compare. + for (unsigned i = 0; i != LNumOps; ++i) { + long X = compare(LA->getOperand(i), RA->getOperand(i)); + if (X != 0) + return X; + } + + return 0; + } + + case scAddExpr: + case scMulExpr: + case scSMaxExpr: + case scUMaxExpr: { + const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS); + const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS); + + // Lexicographically compare n-ary expressions. + unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands(); + for (unsigned i = 0; i != LNumOps; ++i) { + if (i >= RNumOps) + return 1; + long X = compare(LC->getOperand(i), RC->getOperand(i)); + if (X != 0) + return X; + } + return (int)LNumOps - (int)RNumOps; + } + + case scUDivExpr: { + const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS); + const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS); + + // Lexicographically compare udiv expressions. + long X = compare(LC->getLHS(), RC->getLHS()); + if (X != 0) + return X; + return compare(LC->getRHS(), RC->getRHS()); + } + + case scTruncate: + case scZeroExtend: + case scSignExtend: { + const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS); + const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS); + + // Compare cast expressions by operand. + return compare(LC->getOperand(), RC->getOperand()); + } + + default: + break; + } + + llvm_unreachable("Unknown SCEV kind!"); + return 0; + } + }; +} + +/// GroupByComplexity - Given a list of SCEV objects, order them by their +/// complexity, and group objects of the same complexity together by value. +/// When this routine is finished, we know that any duplicates in the vector are +/// consecutive and that complexity is monotonically increasing. +/// +/// Note that we go take special precautions to ensure that we get deterministic +/// results from this routine. In other words, we don't want the results of +/// this to depend on where the addresses of various SCEV objects happened to +/// land in memory. +/// +static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops, + LoopInfo *LI) { + if (Ops.size() < 2) return; // Noop + if (Ops.size() == 2) { + // This is the common case, which also happens to be trivially simple. + // Special case it. + const SCEV *&LHS = Ops[0], *&RHS = Ops[1]; + if (SCEVComplexityCompare(LI)(RHS, LHS)) + std::swap(LHS, RHS); + return; + } + + // Do the rough sort by complexity. + std::stable_sort(Ops.begin(), Ops.end(), SCEVComplexityCompare(LI)); + + // Now that we are sorted by complexity, group elements of the same + // complexity. Note that this is, at worst, N^2, but the vector is likely to + // be extremely short in practice. Note that we take this approach because we + // do not want to depend on the addresses of the objects we are grouping. + for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) { + const SCEV *S = Ops[i]; + unsigned Complexity = S->getSCEVType(); + + // If there are any objects of the same complexity and same value as this + // one, group them. + for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) { + if (Ops[j] == S) { // Found a duplicate. + // Move it to immediately after i'th element. + std::swap(Ops[i+1], Ops[j]); + ++i; // no need to rescan it. + if (i == e-2) return; // Done! + } + } + } +} + + + +//===----------------------------------------------------------------------===// +// Simple SCEV method implementations +//===----------------------------------------------------------------------===// + +/// BinomialCoefficient - Compute BC(It, K). The result has width W. +/// Assume, K > 0. +static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, + ScalarEvolution &SE, + const Type* ResultTy) { + // Handle the simplest case efficiently. + if (K == 1) + return SE.getTruncateOrZeroExtend(It, ResultTy); + + // We are using the following formula for BC(It, K): + // + // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K! + // + // Suppose, W is the bitwidth of the return value. We must be prepared for + // overflow. Hence, we must assure that the result of our computation is + // equal to the accurate one modulo 2^W. Unfortunately, division isn't + // safe in modular arithmetic. + // + // However, this code doesn't use exactly that formula; the formula it uses + // is something like the following, where T is the number of factors of 2 in + // K! (i.e. trailing zeros in the binary representation of K!), and ^ is + // exponentiation: + // + // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T) + // + // This formula is trivially equivalent to the previous formula. However, + // this formula can be implemented much more efficiently. The trick is that + // K! / 2^T is odd, and exact division by an odd number *is* safe in modular + // arithmetic. To do exact division in modular arithmetic, all we have + // to do is multiply by the inverse. Therefore, this step can be done at + // width W. + // + // The next issue is how to safely do the division by 2^T. The way this + // is done is by doing the multiplication step at a width of at least W + T + // bits. This way, the bottom W+T bits of the product are accurate. Then, + // when we perform the division by 2^T (which is equivalent to a right shift + // by T), the bottom W bits are accurate. Extra bits are okay; they'll get + // truncated out after the division by 2^T. + // + // In comparison to just directly using the first formula, this technique + // is much more efficient; using the first formula requires W * K bits, + // but this formula less than W + K bits. Also, the first formula requires + // a division step, whereas this formula only requires multiplies and shifts. + // + // It doesn't matter whether the subtraction step is done in the calculation + // width or the input iteration count's width; if the subtraction overflows, + // the result must be zero anyway. We prefer here to do it in the width of + // the induction variable because it helps a lot for certain cases; CodeGen + // isn't smart enough to ignore the overflow, which leads to much less + // efficient code if the width of the subtraction is wider than the native + // register width. + // + // (It's possible to not widen at all by pulling out factors of 2 before + // the multiplication; for example, K=2 can be calculated as + // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires + // extra arithmetic, so it's not an obvious win, and it gets + // much more complicated for K > 3.) + + // Protection from insane SCEVs; this bound is conservative, + // but it probably doesn't matter. + if (K > 1000) + return SE.getCouldNotCompute(); + + unsigned W = SE.getTypeSizeInBits(ResultTy); + + // Calculate K! / 2^T and T; we divide out the factors of two before + // multiplying for calculating K! / 2^T to avoid overflow. + // Other overflow doesn't matter because we only care about the bottom + // W bits of the result. + APInt OddFactorial(W, 1); + unsigned T = 1; + for (unsigned i = 3; i <= K; ++i) { + APInt Mult(W, i); + unsigned TwoFactors = Mult.countTrailingZeros(); + T += TwoFactors; + Mult = Mult.lshr(TwoFactors); + OddFactorial *= Mult; + } + + // We need at least W + T bits for the multiplication step + unsigned CalculationBits = W + T; + + // Calculate 2^T, at width T+W. + APInt DivFactor = APInt(CalculationBits, 1).shl(T); + + // Calculate the multiplicative inverse of K! / 2^T; + // this multiplication factor will perform the exact division by + // K! / 2^T. + APInt Mod = APInt::getSignedMinValue(W+1); + APInt MultiplyFactor = OddFactorial.zext(W+1); + MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod); + MultiplyFactor = MultiplyFactor.trunc(W); + + // Calculate the product, at width T+W + const IntegerType *CalculationTy = IntegerType::get(SE.getContext(), + CalculationBits); + const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy); + for (unsigned i = 1; i != K; ++i) { + const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i)); + Dividend = SE.getMulExpr(Dividend, + SE.getTruncateOrZeroExtend(S, CalculationTy)); + } + + // Divide by 2^T + const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor)); + + // Truncate the result, and divide by K! / 2^T. + + return SE.getMulExpr(SE.getConstant(MultiplyFactor), + SE.getTruncateOrZeroExtend(DivResult, ResultTy)); +} + +/// evaluateAtIteration - Return the value of this chain of recurrences at +/// the specified iteration number. We can evaluate this recurrence by +/// multiplying each element in the chain by the binomial coefficient +/// corresponding to it. In other words, we can evaluate {A,+,B,+,C,+,D} as: +/// +/// A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3) +/// +/// where BC(It, k) stands for binomial coefficient. +/// +const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It, + ScalarEvolution &SE) const { + const SCEV *Result = getStart(); + for (unsigned i = 1, e = getNumOperands(); i != e; ++i) { + // The computation is correct in the face of overflow provided that the + // multiplication is performed _after_ the evaluation of the binomial + // coefficient. + const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType()); + if (isa<SCEVCouldNotCompute>(Coeff)) + return Coeff; + + Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff)); + } + return Result; +} + +//===----------------------------------------------------------------------===// +// SCEV Expression folder implementations +//===----------------------------------------------------------------------===// + +const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, + const Type *Ty) { + assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) && + "This is not a truncating conversion!"); + assert(isSCEVable(Ty) && + "This is not a conversion to a SCEVable type!"); + Ty = getEffectiveSCEVType(Ty); + + FoldingSetNodeID ID; + ID.AddInteger(scTruncate); + ID.AddPointer(Op); + ID.AddPointer(Ty); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + + // Fold if the operand is constant. + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) + return getConstant( + cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), + getEffectiveSCEVType(Ty)))); + + // trunc(trunc(x)) --> trunc(x) + if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) + return getTruncateExpr(ST->getOperand(), Ty); + + // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing + if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op)) + return getTruncateOrSignExtend(SS->getOperand(), Ty); + + // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing + if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) + return getTruncateOrZeroExtend(SZ->getOperand(), Ty); + + // If the input value is a chrec scev, truncate the chrec's operands. + if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) { + SmallVector<const SCEV *, 4> Operands; + for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) + Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty)); + return getAddRecExpr(Operands, AddRec->getLoop()); + } + + // As a special case, fold trunc(undef) to undef. We don't want to + // know too much about SCEVUnknowns, but this special case is handy + // and harmless. + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Op)) + if (isa<UndefValue>(U->getValue())) + return getSCEV(UndefValue::get(Ty)); + + // The cast wasn't folded; create an explicit cast node. We can reuse + // the existing insert position since if we get here, we won't have + // made any changes which would invalidate it. + SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), + Op, Ty); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + +const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, + const Type *Ty) { + assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && + "This is not an extending conversion!"); + assert(isSCEVable(Ty) && + "This is not a conversion to a SCEVable type!"); + Ty = getEffectiveSCEVType(Ty); + + // Fold if the operand is constant. + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) + return getConstant( + cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), + getEffectiveSCEVType(Ty)))); + + // zext(zext(x)) --> zext(x) + if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) + return getZeroExtendExpr(SZ->getOperand(), Ty); + + // Before doing any expensive analysis, check to see if we've already + // computed a SCEV for this Op and Ty. + FoldingSetNodeID ID; + ID.AddInteger(scZeroExtend); + ID.AddPointer(Op); + ID.AddPointer(Ty); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + + // If the input value is a chrec scev, and we can prove that the value + // did not overflow the old, smaller, value, we can zero extend all of the + // operands (often constants). This allows analysis of something like + // this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; } + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) + if (AR->isAffine()) { + const SCEV *Start = AR->getStart(); + const SCEV *Step = AR->getStepRecurrence(*this); + unsigned BitWidth = getTypeSizeInBits(AR->getType()); + const Loop *L = AR->getLoop(); + + // If we have special knowledge that this addrec won't overflow, + // we don't need to do any further analysis. + if (AR->hasNoUnsignedWrap()) + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getZeroExtendExpr(Step, Ty), + L); + + // Check whether the backedge-taken count is SCEVCouldNotCompute. + // Note that this serves two purposes: It filters out loops that are + // simply not analyzable, and it covers the case where this code is + // being called from within backedge-taken count analysis, such that + // attempting to ask for the backedge-taken count would likely result + // in infinite recursion. In the later case, the analysis code will + // cope with a conservative value, and it will take care to purge + // that value once it has finished. + const SCEV *MaxBECount = getMaxBackedgeTakenCount(L); + if (!isa<SCEVCouldNotCompute>(MaxBECount)) { + // Manually compute the final value for AR, checking for + // overflow. + + // Check whether the backedge-taken count can be losslessly casted to + // the addrec's type. The count is always unsigned. + const SCEV *CastedMaxBECount = + getTruncateOrZeroExtend(MaxBECount, Start->getType()); + const SCEV *RecastedMaxBECount = + getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); + if (MaxBECount == RecastedMaxBECount) { + const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); + // Check whether Start+Step*MaxBECount has no unsigned overflow. + const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step); + const SCEV *Add = getAddExpr(Start, ZMul); + const SCEV *OperandExtendedAdd = + getAddExpr(getZeroExtendExpr(Start, WideTy), + getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getZeroExtendExpr(Step, WideTy))); + if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getZeroExtendExpr(Step, Ty), + L); + + // Similar to above, only this time treat the step value as signed. + // This covers loops that count down. + const SCEV *SMul = getMulExpr(CastedMaxBECount, Step); + Add = getAddExpr(Start, SMul); + OperandExtendedAdd = + getAddExpr(getZeroExtendExpr(Start, WideTy), + getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getSignExtendExpr(Step, WideTy))); + if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + L); + } + + // If the backedge is guarded by a comparison with the pre-inc value + // the addrec is safe. Also, if the entry is guarded by a comparison + // with the start value and the backedge is guarded by a comparison + // with the post-inc value, the addrec is safe. + if (isKnownPositive(Step)) { + const SCEV *N = getConstant(APInt::getMinValue(BitWidth) - + getUnsignedRange(Step).getUnsignedMax()); + if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) || + (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) && + isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, + AR->getPostIncExpr(*this), N))) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getZeroExtendExpr(Step, Ty), + L); + } else if (isKnownNegative(Step)) { + const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) - + getSignedRange(Step).getSignedMin()); + if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) || + (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) && + isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, + AR->getPostIncExpr(*this), N))) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + L); + } + } + } + + // The cast wasn't folded; create an explicit cast node. + // Recompute the insert position, as it may have been invalidated. + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), + Op, Ty); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + +const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, + const Type *Ty) { + assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && + "This is not an extending conversion!"); + assert(isSCEVable(Ty) && + "This is not a conversion to a SCEVable type!"); + Ty = getEffectiveSCEVType(Ty); + + // Fold if the operand is constant. + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) + return getConstant( + cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), + getEffectiveSCEVType(Ty)))); + + // sext(sext(x)) --> sext(x) + if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op)) + return getSignExtendExpr(SS->getOperand(), Ty); + + // Before doing any expensive analysis, check to see if we've already + // computed a SCEV for this Op and Ty. + FoldingSetNodeID ID; + ID.AddInteger(scSignExtend); + ID.AddPointer(Op); + ID.AddPointer(Ty); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + + // If the input value is a chrec scev, and we can prove that the value + // did not overflow the old, smaller, value, we can sign extend all of the + // operands (often constants). This allows analysis of something like + // this: for (signed char X = 0; X < 100; ++X) { int Y = X; } + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) + if (AR->isAffine()) { + const SCEV *Start = AR->getStart(); + const SCEV *Step = AR->getStepRecurrence(*this); + unsigned BitWidth = getTypeSizeInBits(AR->getType()); + const Loop *L = AR->getLoop(); + + // If we have special knowledge that this addrec won't overflow, + // we don't need to do any further analysis. + if (AR->hasNoSignedWrap()) + return getAddRecExpr(getSignExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + L); + + // Check whether the backedge-taken count is SCEVCouldNotCompute. + // Note that this serves two purposes: It filters out loops that are + // simply not analyzable, and it covers the case where this code is + // being called from within backedge-taken count analysis, such that + // attempting to ask for the backedge-taken count would likely result + // in infinite recursion. In the later case, the analysis code will + // cope with a conservative value, and it will take care to purge + // that value once it has finished. + const SCEV *MaxBECount = getMaxBackedgeTakenCount(L); + if (!isa<SCEVCouldNotCompute>(MaxBECount)) { + // Manually compute the final value for AR, checking for + // overflow. + + // Check whether the backedge-taken count can be losslessly casted to + // the addrec's type. The count is always unsigned. + const SCEV *CastedMaxBECount = + getTruncateOrZeroExtend(MaxBECount, Start->getType()); + const SCEV *RecastedMaxBECount = + getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); + if (MaxBECount == RecastedMaxBECount) { + const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); + // Check whether Start+Step*MaxBECount has no signed overflow. + const SCEV *SMul = getMulExpr(CastedMaxBECount, Step); + const SCEV *Add = getAddExpr(Start, SMul); + const SCEV *OperandExtendedAdd = + getAddExpr(getSignExtendExpr(Start, WideTy), + getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getSignExtendExpr(Step, WideTy))); + if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getSignExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + L); + + // Similar to above, only this time treat the step value as unsigned. + // This covers loops that count up with an unsigned step. + const SCEV *UMul = getMulExpr(CastedMaxBECount, Step); + Add = getAddExpr(Start, UMul); + OperandExtendedAdd = + getAddExpr(getSignExtendExpr(Start, WideTy), + getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getZeroExtendExpr(Step, WideTy))); + if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getSignExtendExpr(Start, Ty), + getZeroExtendExpr(Step, Ty), + L); + } + + // If the backedge is guarded by a comparison with the pre-inc value + // the addrec is safe. Also, if the entry is guarded by a comparison + // with the start value and the backedge is guarded by a comparison + // with the post-inc value, the addrec is safe. + if (isKnownPositive(Step)) { + const SCEV *N = getConstant(APInt::getSignedMinValue(BitWidth) - + getSignedRange(Step).getSignedMax()); + if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, AR, N) || + (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) && + isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, + AR->getPostIncExpr(*this), N))) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getSignExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + L); + } else if (isKnownNegative(Step)) { + const SCEV *N = getConstant(APInt::getSignedMaxValue(BitWidth) - + getSignedRange(Step).getSignedMin()); + if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, AR, N) || + (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) && + isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, + AR->getPostIncExpr(*this), N))) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getSignExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + L); + } + } + } + + // The cast wasn't folded; create an explicit cast node. + // Recompute the insert position, as it may have been invalidated. + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), + Op, Ty); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + +/// getAnyExtendExpr - Return a SCEV for the given operand extended with +/// unspecified bits out to the given type. +/// +const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, + const Type *Ty) { + assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && + "This is not an extending conversion!"); + assert(isSCEVable(Ty) && + "This is not a conversion to a SCEVable type!"); + Ty = getEffectiveSCEVType(Ty); + + // Sign-extend negative constants. + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) + if (SC->getValue()->getValue().isNegative()) + return getSignExtendExpr(Op, Ty); + + // Peel off a truncate cast. + if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) { + const SCEV *NewOp = T->getOperand(); + if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty)) + return getAnyExtendExpr(NewOp, Ty); + return getTruncateOrNoop(NewOp, Ty); + } + + // Next try a zext cast. If the cast is folded, use it. + const SCEV *ZExt = getZeroExtendExpr(Op, Ty); + if (!isa<SCEVZeroExtendExpr>(ZExt)) + return ZExt; + + // Next try a sext cast. If the cast is folded, use it. + const SCEV *SExt = getSignExtendExpr(Op, Ty); + if (!isa<SCEVSignExtendExpr>(SExt)) + return SExt; + + // Force the cast to be folded into the operands of an addrec. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) { + SmallVector<const SCEV *, 4> Ops; + for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); + I != E; ++I) + Ops.push_back(getAnyExtendExpr(*I, Ty)); + return getAddRecExpr(Ops, AR->getLoop()); + } + + // As a special case, fold anyext(undef) to undef. We don't want to + // know too much about SCEVUnknowns, but this special case is handy + // and harmless. + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Op)) + if (isa<UndefValue>(U->getValue())) + return getSCEV(UndefValue::get(Ty)); + + // If the expression is obviously signed, use the sext cast value. + if (isa<SCEVSMaxExpr>(Op)) + return SExt; + + // Absent any other information, use the zext cast value. + return ZExt; +} + +/// CollectAddOperandsWithScales - Process the given Ops list, which is +/// a list of operands to be added under the given scale, update the given +/// map. This is a helper function for getAddRecExpr. As an example of +/// what it does, given a sequence of operands that would form an add +/// expression like this: +/// +/// m + n + 13 + (A * (o + p + (B * q + m + 29))) + r + (-1 * r) +/// +/// where A and B are constants, update the map with these values: +/// +/// (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0) +/// +/// and add 13 + A*B*29 to AccumulatedConstant. +/// This will allow getAddRecExpr to produce this: +/// +/// 13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B) +/// +/// This form often exposes folding opportunities that are hidden in +/// the original operand list. +/// +/// Return true iff it appears that any interesting folding opportunities +/// may be exposed. This helps getAddRecExpr short-circuit extra work in +/// the common case where no interesting opportunities are present, and +/// is also used as a check to avoid infinite recursion. +/// +static bool +CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M, + SmallVector<const SCEV *, 8> &NewOps, + APInt &AccumulatedConstant, + const SCEV *const *Ops, size_t NumOperands, + const APInt &Scale, + ScalarEvolution &SE) { + bool Interesting = false; + + // Iterate over the add operands. They are sorted, with constants first. + unsigned i = 0; + while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) { + ++i; + // Pull a buried constant out to the outside. + if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero()) + Interesting = true; + AccumulatedConstant += Scale * C->getValue()->getValue(); + } + + // Next comes everything else. We're especially interested in multiplies + // here, but they're in the middle, so just visit the rest with one loop. + for (; i != NumOperands; ++i) { + const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]); + if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) { + APInt NewScale = + Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue(); + if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) { + // A multiplication of a constant with another add; recurse. + const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1)); + Interesting |= + CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, + Add->op_begin(), Add->getNumOperands(), + NewScale, SE); + } else { + // A multiplication of a constant with some other value. Update + // the map. + SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end()); + const SCEV *Key = SE.getMulExpr(MulOps); + std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair = + M.insert(std::make_pair(Key, NewScale)); + if (Pair.second) { + NewOps.push_back(Pair.first->first); + } else { + Pair.first->second += NewScale; + // The map already had an entry for this value, which may indicate + // a folding opportunity. + Interesting = true; + } + } + } else { + // An ordinary operand. Update the map. + std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair = + M.insert(std::make_pair(Ops[i], Scale)); + if (Pair.second) { + NewOps.push_back(Pair.first->first); + } else { + Pair.first->second += Scale; + // The map already had an entry for this value, which may indicate + // a folding opportunity. + Interesting = true; + } + } + } + + return Interesting; +} + +namespace { + struct APIntCompare { + bool operator()(const APInt &LHS, const APInt &RHS) const { + return LHS.ult(RHS); + } + }; +} + +/// getAddExpr - Get a canonical add expression, or something simpler if +/// possible. +const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, + bool HasNUW, bool HasNSW) { + assert(!Ops.empty() && "Cannot get empty add!"); + if (Ops.size() == 1) return Ops[0]; +#ifndef NDEBUG + const Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); + for (unsigned i = 1, e = Ops.size(); i != e; ++i) + assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && + "SCEVAddExpr operand types don't match!"); +#endif + + // If HasNSW is true and all the operands are non-negative, infer HasNUW. + if (!HasNUW && HasNSW) { + bool All = true; + for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(), + E = Ops.end(); I != E; ++I) + if (!isKnownNonNegative(*I)) { + All = false; + break; + } + if (All) HasNUW = true; + } + + // Sort by complexity, this groups all similar expression types together. + GroupByComplexity(Ops, LI); + + // If there are any constants, fold them together. + unsigned Idx = 0; + if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { + ++Idx; + assert(Idx < Ops.size()); + while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { + // We found two constants, fold them together! + Ops[0] = getConstant(LHSC->getValue()->getValue() + + RHSC->getValue()->getValue()); + if (Ops.size() == 2) return Ops[0]; + Ops.erase(Ops.begin()+1); // Erase the folded element + LHSC = cast<SCEVConstant>(Ops[0]); + } + + // If we are left with a constant zero being added, strip it off. + if (LHSC->getValue()->isZero()) { + Ops.erase(Ops.begin()); + --Idx; + } + + if (Ops.size() == 1) return Ops[0]; + } + + // Okay, check to see if the same value occurs in the operand list more than + // once. If so, merge them together into an multiply expression. Since we + // sorted the list, these values are required to be adjacent. + const Type *Ty = Ops[0]->getType(); + bool FoundMatch = false; + for (unsigned i = 0, e = Ops.size(); i != e-1; ++i) + if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2 + // Scan ahead to count how many equal operands there are. + unsigned Count = 2; + while (i+Count != e && Ops[i+Count] == Ops[i]) + ++Count; + // Merge the values into a multiply. + const SCEV *Scale = getConstant(Ty, Count); + const SCEV *Mul = getMulExpr(Scale, Ops[i]); + if (Ops.size() == Count) + return Mul; + Ops[i] = Mul; + Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count); + --i; e -= Count - 1; + FoundMatch = true; + } + if (FoundMatch) + return getAddExpr(Ops, HasNUW, HasNSW); + + // Check for truncates. If all the operands are truncated from the same + // type, see if factoring out the truncate would permit the result to be + // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n) + // if the contents of the resulting outer trunc fold to something simple. + for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) { + const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]); + const Type *DstType = Trunc->getType(); + const Type *SrcType = Trunc->getOperand()->getType(); + SmallVector<const SCEV *, 8> LargeOps; + bool Ok = true; + // Check all the operands to see if they can be represented in the + // source type of the truncate. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) { + if (T->getOperand()->getType() != SrcType) { + Ok = false; + break; + } + LargeOps.push_back(T->getOperand()); + } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) { + LargeOps.push_back(getAnyExtendExpr(C, SrcType)); + } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) { + SmallVector<const SCEV *, 8> LargeMulOps; + for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) { + if (const SCEVTruncateExpr *T = + dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) { + if (T->getOperand()->getType() != SrcType) { + Ok = false; + break; + } + LargeMulOps.push_back(T->getOperand()); + } else if (const SCEVConstant *C = + dyn_cast<SCEVConstant>(M->getOperand(j))) { + LargeMulOps.push_back(getAnyExtendExpr(C, SrcType)); + } else { + Ok = false; + break; + } + } + if (Ok) + LargeOps.push_back(getMulExpr(LargeMulOps)); + } else { + Ok = false; + break; + } + } + if (Ok) { + // Evaluate the expression in the larger type. + const SCEV *Fold = getAddExpr(LargeOps, HasNUW, HasNSW); + // If it folds to something simple, use it. Otherwise, don't. + if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold)) + return getTruncateExpr(Fold, DstType); + } + } + + // Skip past any other cast SCEVs. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr) + ++Idx; + + // If there are add operands they would be next. + if (Idx < Ops.size()) { + bool DeletedAdd = false; + while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) { + // If we have an add, expand the add operands onto the end of the operands + // list. + Ops.erase(Ops.begin()+Idx); + Ops.append(Add->op_begin(), Add->op_end()); + DeletedAdd = true; + } + + // If we deleted at least one add, we added operands to the end of the list, + // and they are not necessarily sorted. Recurse to resort and resimplify + // any operands we just acquired. + if (DeletedAdd) + return getAddExpr(Ops); + } + + // Skip over the add expression until we get to a multiply. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr) + ++Idx; + + // Check to see if there are any folding opportunities present with + // operands multiplied by constant values. + if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) { + uint64_t BitWidth = getTypeSizeInBits(Ty); + DenseMap<const SCEV *, APInt> M; + SmallVector<const SCEV *, 8> NewOps; + APInt AccumulatedConstant(BitWidth, 0); + if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, + Ops.data(), Ops.size(), + APInt(BitWidth, 1), *this)) { + // Some interesting folding opportunity is present, so its worthwhile to + // re-generate the operands list. Group the operands by constant scale, + // to avoid multiplying by the same constant scale multiple times. + std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists; + for (SmallVector<const SCEV *, 8>::const_iterator I = NewOps.begin(), + E = NewOps.end(); I != E; ++I) + MulOpLists[M.find(*I)->second].push_back(*I); + // Re-generate the operands list. + Ops.clear(); + if (AccumulatedConstant != 0) + Ops.push_back(getConstant(AccumulatedConstant)); + for (std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare>::iterator + I = MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I) + if (I->first != 0) + Ops.push_back(getMulExpr(getConstant(I->first), + getAddExpr(I->second))); + if (Ops.empty()) + return getConstant(Ty, 0); + if (Ops.size() == 1) + return Ops[0]; + return getAddExpr(Ops); + } + } + + // If we are adding something to a multiply expression, make sure the + // something is not already an operand of the multiply. If so, merge it into + // the multiply. + for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) { + const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]); + for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) { + const SCEV *MulOpSCEV = Mul->getOperand(MulOp); + if (isa<SCEVConstant>(MulOpSCEV)) + continue; + for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp) + if (MulOpSCEV == Ops[AddOp]) { + // Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1)) + const SCEV *InnerMul = Mul->getOperand(MulOp == 0); + if (Mul->getNumOperands() != 2) { + // If the multiply has more than two operands, we must get the + // Y*Z term. + SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(), + Mul->op_begin()+MulOp); + MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); + InnerMul = getMulExpr(MulOps); + } + const SCEV *One = getConstant(Ty, 1); + const SCEV *AddOne = getAddExpr(One, InnerMul); + const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV); + if (Ops.size() == 2) return OuterMul; + if (AddOp < Idx) { + Ops.erase(Ops.begin()+AddOp); + Ops.erase(Ops.begin()+Idx-1); + } else { + Ops.erase(Ops.begin()+Idx); + Ops.erase(Ops.begin()+AddOp-1); + } + Ops.push_back(OuterMul); + return getAddExpr(Ops); + } + + // Check this multiply against other multiplies being added together. + for (unsigned OtherMulIdx = Idx+1; + OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]); + ++OtherMulIdx) { + const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]); + // If MulOp occurs in OtherMul, we can fold the two multiplies + // together. + for (unsigned OMulOp = 0, e = OtherMul->getNumOperands(); + OMulOp != e; ++OMulOp) + if (OtherMul->getOperand(OMulOp) == MulOpSCEV) { + // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E)) + const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0); + if (Mul->getNumOperands() != 2) { + SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(), + Mul->op_begin()+MulOp); + MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); + InnerMul1 = getMulExpr(MulOps); + } + const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0); + if (OtherMul->getNumOperands() != 2) { + SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(), + OtherMul->op_begin()+OMulOp); + MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end()); + InnerMul2 = getMulExpr(MulOps); + } + const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2); + const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum); + if (Ops.size() == 2) return OuterMul; + Ops.erase(Ops.begin()+Idx); + Ops.erase(Ops.begin()+OtherMulIdx-1); + Ops.push_back(OuterMul); + return getAddExpr(Ops); + } + } + } + } + + // If there are any add recurrences in the operands list, see if any other + // added values are loop invariant. If so, we can fold them into the + // recurrence. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr) + ++Idx; + + // Scan over all recurrences, trying to fold loop invariants into them. + for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) { + // Scan all of the other operands to this add and add them to the vector if + // they are loop invariant w.r.t. the recurrence. + SmallVector<const SCEV *, 8> LIOps; + const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]); + const Loop *AddRecLoop = AddRec->getLoop(); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (Ops[i]->isLoopInvariant(AddRecLoop)) { + LIOps.push_back(Ops[i]); + Ops.erase(Ops.begin()+i); + --i; --e; + } + + // If we found some loop invariants, fold them into the recurrence. + if (!LIOps.empty()) { + // NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step} + LIOps.push_back(AddRec->getStart()); + + SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(), + AddRec->op_end()); + AddRecOps[0] = getAddExpr(LIOps); + + // Build the new addrec. Propagate the NUW and NSW flags if both the + // outer add and the inner addrec are guaranteed to have no overflow. + const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, + HasNUW && AddRec->hasNoUnsignedWrap(), + HasNSW && AddRec->hasNoSignedWrap()); + + // If all of the other operands were loop invariant, we are done. + if (Ops.size() == 1) return NewRec; + + // Otherwise, add the folded AddRec by the non-liv parts. + for (unsigned i = 0;; ++i) + if (Ops[i] == AddRec) { + Ops[i] = NewRec; + break; + } + return getAddExpr(Ops); + } + + // Okay, if there weren't any loop invariants to be folded, check to see if + // there are multiple AddRec's with the same loop induction variable being + // added together. If so, we can fold them. + for (unsigned OtherIdx = Idx+1; + OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); + ++OtherIdx) + if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) { + // Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L> + SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(), + AddRec->op_end()); + for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); + ++OtherIdx) + if (const SCEVAddRecExpr *OtherAddRec = + dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx])) + if (OtherAddRec->getLoop() == AddRecLoop) { + for (unsigned i = 0, e = OtherAddRec->getNumOperands(); + i != e; ++i) { + if (i >= AddRecOps.size()) { + AddRecOps.append(OtherAddRec->op_begin()+i, + OtherAddRec->op_end()); + break; + } + AddRecOps[i] = getAddExpr(AddRecOps[i], + OtherAddRec->getOperand(i)); + } + Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; + } + Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop); + return getAddExpr(Ops); + } + + // Otherwise couldn't fold anything into this recurrence. Move onto the + // next one. + } + + // Okay, it looks like we really DO need an add expr. Check to see if we + // already have one, otherwise create a new one. + FoldingSetNodeID ID; + ID.AddInteger(scAddExpr); + ID.AddInteger(Ops.size()); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + ID.AddPointer(Ops[i]); + void *IP = 0; + SCEVAddExpr *S = + static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + if (!S) { + const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); + S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator), + O, Ops.size()); + UniqueSCEVs.InsertNode(S, IP); + } + if (HasNUW) S->setHasNoUnsignedWrap(true); + if (HasNSW) S->setHasNoSignedWrap(true); + return S; +} + +/// getMulExpr - Get a canonical multiply expression, or something simpler if +/// possible. +const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, + bool HasNUW, bool HasNSW) { + assert(!Ops.empty() && "Cannot get empty mul!"); + if (Ops.size() == 1) return Ops[0]; +#ifndef NDEBUG + const Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); + for (unsigned i = 1, e = Ops.size(); i != e; ++i) + assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && + "SCEVMulExpr operand types don't match!"); +#endif + + // If HasNSW is true and all the operands are non-negative, infer HasNUW. + if (!HasNUW && HasNSW) { + bool All = true; + for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(), + E = Ops.end(); I != E; ++I) + if (!isKnownNonNegative(*I)) { + All = false; + break; + } + if (All) HasNUW = true; + } + + // Sort by complexity, this groups all similar expression types together. + GroupByComplexity(Ops, LI); + + // If there are any constants, fold them together. + unsigned Idx = 0; + if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { + + // C1*(C2+V) -> C1*C2 + C1*V + if (Ops.size() == 2) + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) + if (Add->getNumOperands() == 2 && + isa<SCEVConstant>(Add->getOperand(0))) + return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)), + getMulExpr(LHSC, Add->getOperand(1))); + + ++Idx; + while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { + // We found two constants, fold them together! + ConstantInt *Fold = ConstantInt::get(getContext(), + LHSC->getValue()->getValue() * + RHSC->getValue()->getValue()); + Ops[0] = getConstant(Fold); + Ops.erase(Ops.begin()+1); // Erase the folded element + if (Ops.size() == 1) return Ops[0]; + LHSC = cast<SCEVConstant>(Ops[0]); + } + + // If we are left with a constant one being multiplied, strip it off. + if (cast<SCEVConstant>(Ops[0])->getValue()->equalsInt(1)) { + Ops.erase(Ops.begin()); + --Idx; + } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) { + // If we have a multiply of zero, it will always be zero. + return Ops[0]; + } else if (Ops[0]->isAllOnesValue()) { + // If we have a mul by -1 of an add, try distributing the -1 among the + // add operands. + if (Ops.size() == 2) + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) { + SmallVector<const SCEV *, 4> NewOps; + bool AnyFolded = false; + for (SCEVAddRecExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) { + const SCEV *Mul = getMulExpr(Ops[0], *I); + if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true; + NewOps.push_back(Mul); + } + if (AnyFolded) + return getAddExpr(NewOps); + } + } + + if (Ops.size() == 1) + return Ops[0]; + } + + // Skip over the add expression until we get to a multiply. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr) + ++Idx; + + // If there are mul operands inline them all into this expression. + if (Idx < Ops.size()) { + bool DeletedMul = false; + while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) { + // If we have an mul, expand the mul operands onto the end of the operands + // list. + Ops.erase(Ops.begin()+Idx); + Ops.append(Mul->op_begin(), Mul->op_end()); + DeletedMul = true; + } + + // If we deleted at least one mul, we added operands to the end of the list, + // and they are not necessarily sorted. Recurse to resort and resimplify + // any operands we just acquired. + if (DeletedMul) + return getMulExpr(Ops); + } + + // If there are any add recurrences in the operands list, see if any other + // added values are loop invariant. If so, we can fold them into the + // recurrence. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr) + ++Idx; + + // Scan over all recurrences, trying to fold loop invariants into them. + for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) { + // Scan all of the other operands to this mul and add them to the vector if + // they are loop invariant w.r.t. the recurrence. + SmallVector<const SCEV *, 8> LIOps; + const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]); + const Loop *AddRecLoop = AddRec->getLoop(); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (Ops[i]->isLoopInvariant(AddRecLoop)) { + LIOps.push_back(Ops[i]); + Ops.erase(Ops.begin()+i); + --i; --e; + } + + // If we found some loop invariants, fold them into the recurrence. + if (!LIOps.empty()) { + // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step} + SmallVector<const SCEV *, 4> NewOps; + NewOps.reserve(AddRec->getNumOperands()); + const SCEV *Scale = getMulExpr(LIOps); + for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) + NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i))); + + // Build the new addrec. Propagate the NUW and NSW flags if both the + // outer mul and the inner addrec are guaranteed to have no overflow. + const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, + HasNUW && AddRec->hasNoUnsignedWrap(), + HasNSW && AddRec->hasNoSignedWrap()); + + // If all of the other operands were loop invariant, we are done. + if (Ops.size() == 1) return NewRec; + + // Otherwise, multiply the folded AddRec by the non-liv parts. + for (unsigned i = 0;; ++i) + if (Ops[i] == AddRec) { + Ops[i] = NewRec; + break; + } + return getMulExpr(Ops); + } + + // Okay, if there weren't any loop invariants to be folded, check to see if + // there are multiple AddRec's with the same loop induction variable being + // multiplied together. If so, we can fold them. + for (unsigned OtherIdx = Idx+1; + OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); + ++OtherIdx) + if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) { + // F * G, where F = {A,+,B}<L> and G = {C,+,D}<L> --> + // {A*C,+,F*D + G*B + B*D}<L> + for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); + ++OtherIdx) + if (const SCEVAddRecExpr *OtherAddRec = + dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx])) + if (OtherAddRec->getLoop() == AddRecLoop) { + const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec; + const SCEV *NewStart = getMulExpr(F->getStart(), G->getStart()); + const SCEV *B = F->getStepRecurrence(*this); + const SCEV *D = G->getStepRecurrence(*this); + const SCEV *NewStep = getAddExpr(getMulExpr(F, D), + getMulExpr(G, B), + getMulExpr(B, D)); + const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep, + F->getLoop()); + if (Ops.size() == 2) return NewAddRec; + Ops[Idx] = AddRec = cast<SCEVAddRecExpr>(NewAddRec); + Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; + } + return getMulExpr(Ops); + } + + // Otherwise couldn't fold anything into this recurrence. Move onto the + // next one. + } + + // Okay, it looks like we really DO need an mul expr. Check to see if we + // already have one, otherwise create a new one. + FoldingSetNodeID ID; + ID.AddInteger(scMulExpr); + ID.AddInteger(Ops.size()); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + ID.AddPointer(Ops[i]); + void *IP = 0; + SCEVMulExpr *S = + static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + if (!S) { + const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); + S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator), + O, Ops.size()); + UniqueSCEVs.InsertNode(S, IP); + } + if (HasNUW) S->setHasNoUnsignedWrap(true); + if (HasNSW) S->setHasNoSignedWrap(true); + return S; +} + +/// getUDivExpr - Get a canonical unsigned division expression, or something +/// simpler if possible. +const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, + const SCEV *RHS) { + assert(getEffectiveSCEVType(LHS->getType()) == + getEffectiveSCEVType(RHS->getType()) && + "SCEVUDivExpr operand types don't match!"); + + if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) { + if (RHSC->getValue()->equalsInt(1)) + return LHS; // X udiv 1 --> x + // If the denominator is zero, the result of the udiv is undefined. Don't + // try to analyze it, because the resolution chosen here may differ from + // the resolution chosen in other parts of the compiler. + if (!RHSC->getValue()->isZero()) { + // Determine if the division can be folded into the operands of + // its operands. + // TODO: Generalize this to non-constants by using known-bits information. + const Type *Ty = LHS->getType(); + unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros(); + unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1; + // For non-power-of-two values, effectively round the value up to the + // nearest power of two. + if (!RHSC->getValue()->getValue().isPowerOf2()) + ++MaxShiftAmt; + const IntegerType *ExtTy = + IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt); + // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) + if (const SCEVConstant *Step = + dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) + if (!Step->getValue()->getValue() + .urem(RHSC->getValue()->getValue()) && + getZeroExtendExpr(AR, ExtTy) == + getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), + getZeroExtendExpr(Step, ExtTy), + AR->getLoop())) { + SmallVector<const SCEV *, 4> Operands; + for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i) + Operands.push_back(getUDivExpr(AR->getOperand(i), RHS)); + return getAddRecExpr(Operands, AR->getLoop()); + } + // (A*B)/C --> A*(B/C) if safe and B/C can be folded. + if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) { + SmallVector<const SCEV *, 4> Operands; + for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) + Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy)); + if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands)) + // Find an operand that's safely divisible. + for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { + const SCEV *Op = M->getOperand(i); + const SCEV *Div = getUDivExpr(Op, RHSC); + if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) { + Operands = SmallVector<const SCEV *, 4>(M->op_begin(), + M->op_end()); + Operands[i] = Div; + return getMulExpr(Operands); + } + } + } + // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded. + if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(LHS)) { + SmallVector<const SCEV *, 4> Operands; + for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) + Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy)); + if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) { + Operands.clear(); + for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) { + const SCEV *Op = getUDivExpr(A->getOperand(i), RHS); + if (isa<SCEVUDivExpr>(Op) || + getMulExpr(Op, RHS) != A->getOperand(i)) + break; + Operands.push_back(Op); + } + if (Operands.size() == A->getNumOperands()) + return getAddExpr(Operands); + } + } + + // Fold if both operands are constant. + if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) { + Constant *LHSCV = LHSC->getValue(); + Constant *RHSCV = RHSC->getValue(); + return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV, + RHSCV))); + } + } + } + + FoldingSetNodeID ID; + ID.AddInteger(scUDivExpr); + ID.AddPointer(LHS); + ID.AddPointer(RHS); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator), + LHS, RHS); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + + +/// getAddRecExpr - Get an add recurrence expression for the specified loop. +/// Simplify the expression as much as possible. +const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, + const SCEV *Step, const Loop *L, + bool HasNUW, bool HasNSW) { + SmallVector<const SCEV *, 4> Operands; + Operands.push_back(Start); + if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step)) + if (StepChrec->getLoop() == L) { + Operands.append(StepChrec->op_begin(), StepChrec->op_end()); + return getAddRecExpr(Operands, L); + } + + Operands.push_back(Step); + return getAddRecExpr(Operands, L, HasNUW, HasNSW); +} + +/// getAddRecExpr - Get an add recurrence expression for the specified loop. +/// Simplify the expression as much as possible. +const SCEV * +ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, + const Loop *L, + bool HasNUW, bool HasNSW) { + if (Operands.size() == 1) return Operands[0]; +#ifndef NDEBUG + const Type *ETy = getEffectiveSCEVType(Operands[0]->getType()); + for (unsigned i = 1, e = Operands.size(); i != e; ++i) + assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy && + "SCEVAddRecExpr operand types don't match!"); +#endif + + if (Operands.back()->isZero()) { + Operands.pop_back(); + return getAddRecExpr(Operands, L, HasNUW, HasNSW); // {X,+,0} --> X + } + + // It's tempting to want to call getMaxBackedgeTakenCount count here and + // use that information to infer NUW and NSW flags. However, computing a + // BE count requires calling getAddRecExpr, so we may not yet have a + // meaningful BE count at this point (and if we don't, we'd be stuck + // with a SCEVCouldNotCompute as the cached BE count). + + // If HasNSW is true and all the operands are non-negative, infer HasNUW. + if (!HasNUW && HasNSW) { + bool All = true; + for (SmallVectorImpl<const SCEV *>::const_iterator I = Operands.begin(), + E = Operands.end(); I != E; ++I) + if (!isKnownNonNegative(*I)) { + All = false; + break; + } + if (All) HasNUW = true; + } + + // Canonicalize nested AddRecs in by nesting them in order of loop depth. + if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) { + const Loop *NestedLoop = NestedAR->getLoop(); + if (L->contains(NestedLoop) ? + (L->getLoopDepth() < NestedLoop->getLoopDepth()) : + (!NestedLoop->contains(L) && + DT->dominates(L->getHeader(), NestedLoop->getHeader()))) { + SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(), + NestedAR->op_end()); + Operands[0] = NestedAR->getStart(); + // AddRecs require their operands be loop-invariant with respect to their + // loops. Don't perform this transformation if it would break this + // requirement. + bool AllInvariant = true; + for (unsigned i = 0, e = Operands.size(); i != e; ++i) + if (!Operands[i]->isLoopInvariant(L)) { + AllInvariant = false; + break; + } + if (AllInvariant) { + NestedOperands[0] = getAddRecExpr(Operands, L); + AllInvariant = true; + for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i) + if (!NestedOperands[i]->isLoopInvariant(NestedLoop)) { + AllInvariant = false; + break; + } + if (AllInvariant) + // Ok, both add recurrences are valid after the transformation. + return getAddRecExpr(NestedOperands, NestedLoop, HasNUW, HasNSW); + } + // Reset Operands to its original state. + Operands[0] = NestedAR; + } + } + + // Okay, it looks like we really DO need an addrec expr. Check to see if we + // already have one, otherwise create a new one. + FoldingSetNodeID ID; + ID.AddInteger(scAddRecExpr); + ID.AddInteger(Operands.size()); + for (unsigned i = 0, e = Operands.size(); i != e; ++i) + ID.AddPointer(Operands[i]); + ID.AddPointer(L); + void *IP = 0; + SCEVAddRecExpr *S = + static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + if (!S) { + const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size()); + std::uninitialized_copy(Operands.begin(), Operands.end(), O); + S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator), + O, Operands.size(), L); + UniqueSCEVs.InsertNode(S, IP); + } + if (HasNUW) S->setHasNoUnsignedWrap(true); + if (HasNSW) S->setHasNoSignedWrap(true); + return S; +} + +const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, + const SCEV *RHS) { + SmallVector<const SCEV *, 2> Ops; + Ops.push_back(LHS); + Ops.push_back(RHS); + return getSMaxExpr(Ops); +} + +const SCEV * +ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { + assert(!Ops.empty() && "Cannot get empty smax!"); + if (Ops.size() == 1) return Ops[0]; +#ifndef NDEBUG + const Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); + for (unsigned i = 1, e = Ops.size(); i != e; ++i) + assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && + "SCEVSMaxExpr operand types don't match!"); +#endif + + // Sort by complexity, this groups all similar expression types together. + GroupByComplexity(Ops, LI); + + // If there are any constants, fold them together. + unsigned Idx = 0; + if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { + ++Idx; + assert(Idx < Ops.size()); + while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { + // We found two constants, fold them together! + ConstantInt *Fold = ConstantInt::get(getContext(), + APIntOps::smax(LHSC->getValue()->getValue(), + RHSC->getValue()->getValue())); + Ops[0] = getConstant(Fold); + Ops.erase(Ops.begin()+1); // Erase the folded element + if (Ops.size() == 1) return Ops[0]; + LHSC = cast<SCEVConstant>(Ops[0]); + } + + // If we are left with a constant minimum-int, strip it off. + if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) { + Ops.erase(Ops.begin()); + --Idx; + } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) { + // If we have an smax with a constant maximum-int, it will always be + // maximum-int. + return Ops[0]; + } + + if (Ops.size() == 1) return Ops[0]; + } + + // Find the first SMax + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr) + ++Idx; + + // Check to see if one of the operands is an SMax. If so, expand its operands + // onto our operand list, and recurse to simplify. + if (Idx < Ops.size()) { + bool DeletedSMax = false; + while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) { + Ops.erase(Ops.begin()+Idx); + Ops.append(SMax->op_begin(), SMax->op_end()); + DeletedSMax = true; + } + + if (DeletedSMax) + return getSMaxExpr(Ops); + } + + // Okay, check to see if the same value occurs in the operand list twice. If + // so, delete one. Since we sorted the list, these values are required to + // be adjacent. + for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) + // X smax Y smax Y --> X smax Y + // X smax Y --> X, if X is always greater than Y + if (Ops[i] == Ops[i+1] || + isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) { + Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); + --i; --e; + } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) { + Ops.erase(Ops.begin()+i, Ops.begin()+i+1); + --i; --e; + } + + if (Ops.size() == 1) return Ops[0]; + + assert(!Ops.empty() && "Reduced smax down to nothing!"); + + // Okay, it looks like we really DO need an smax expr. Check to see if we + // already have one, otherwise create a new one. + FoldingSetNodeID ID; + ID.AddInteger(scSMaxExpr); + ID.AddInteger(Ops.size()); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + ID.AddPointer(Ops[i]); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); + SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator), + O, Ops.size()); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + +const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, + const SCEV *RHS) { + SmallVector<const SCEV *, 2> Ops; + Ops.push_back(LHS); + Ops.push_back(RHS); + return getUMaxExpr(Ops); +} + +const SCEV * +ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { + assert(!Ops.empty() && "Cannot get empty umax!"); + if (Ops.size() == 1) return Ops[0]; +#ifndef NDEBUG + const Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); + for (unsigned i = 1, e = Ops.size(); i != e; ++i) + assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && + "SCEVUMaxExpr operand types don't match!"); +#endif + + // Sort by complexity, this groups all similar expression types together. + GroupByComplexity(Ops, LI); + + // If there are any constants, fold them together. + unsigned Idx = 0; + if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { + ++Idx; + assert(Idx < Ops.size()); + while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { + // We found two constants, fold them together! + ConstantInt *Fold = ConstantInt::get(getContext(), + APIntOps::umax(LHSC->getValue()->getValue(), + RHSC->getValue()->getValue())); + Ops[0] = getConstant(Fold); + Ops.erase(Ops.begin()+1); // Erase the folded element + if (Ops.size() == 1) return Ops[0]; + LHSC = cast<SCEVConstant>(Ops[0]); + } + + // If we are left with a constant minimum-int, strip it off. + if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) { + Ops.erase(Ops.begin()); + --Idx; + } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) { + // If we have an umax with a constant maximum-int, it will always be + // maximum-int. + return Ops[0]; + } + + if (Ops.size() == 1) return Ops[0]; + } + + // Find the first UMax + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr) + ++Idx; + + // Check to see if one of the operands is a UMax. If so, expand its operands + // onto our operand list, and recurse to simplify. + if (Idx < Ops.size()) { + bool DeletedUMax = false; + while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) { + Ops.erase(Ops.begin()+Idx); + Ops.append(UMax->op_begin(), UMax->op_end()); + DeletedUMax = true; + } + + if (DeletedUMax) + return getUMaxExpr(Ops); + } + + // Okay, check to see if the same value occurs in the operand list twice. If + // so, delete one. Since we sorted the list, these values are required to + // be adjacent. + for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) + // X umax Y umax Y --> X umax Y + // X umax Y --> X, if X is always greater than Y + if (Ops[i] == Ops[i+1] || + isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) { + Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); + --i; --e; + } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) { + Ops.erase(Ops.begin()+i, Ops.begin()+i+1); + --i; --e; + } + + if (Ops.size() == 1) return Ops[0]; + + assert(!Ops.empty() && "Reduced umax down to nothing!"); + + // Okay, it looks like we really DO need a umax expr. Check to see if we + // already have one, otherwise create a new one. + FoldingSetNodeID ID; + ID.AddInteger(scUMaxExpr); + ID.AddInteger(Ops.size()); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + ID.AddPointer(Ops[i]); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); + SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator), + O, Ops.size()); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + +const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS, + const SCEV *RHS) { + // ~smax(~x, ~y) == smin(x, y). + return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); +} + +const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, + const SCEV *RHS) { + // ~umax(~x, ~y) == umin(x, y) + return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); +} + +const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) { + // If we have TargetData, we can bypass creating a target-independent + // constant expression and then folding it back into a ConstantInt. + // This is just a compile-time optimization. + if (TD) + return getConstant(TD->getIntPtrType(getContext()), + TD->getTypeAllocSize(AllocTy)); + + Constant *C = ConstantExpr::getSizeOf(AllocTy); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + C = Folded; + const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); + return getTruncateOrZeroExtend(getSCEV(C), Ty); +} + +const SCEV *ScalarEvolution::getAlignOfExpr(const Type *AllocTy) { + Constant *C = ConstantExpr::getAlignOf(AllocTy); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + C = Folded; + const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); + return getTruncateOrZeroExtend(getSCEV(C), Ty); +} + +const SCEV *ScalarEvolution::getOffsetOfExpr(const StructType *STy, + unsigned FieldNo) { + // If we have TargetData, we can bypass creating a target-independent + // constant expression and then folding it back into a ConstantInt. + // This is just a compile-time optimization. + if (TD) + return getConstant(TD->getIntPtrType(getContext()), + TD->getStructLayout(STy)->getElementOffset(FieldNo)); + + Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + C = Folded; + const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); + return getTruncateOrZeroExtend(getSCEV(C), Ty); +} + +const SCEV *ScalarEvolution::getOffsetOfExpr(const Type *CTy, + Constant *FieldNo) { + Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + C = Folded; + const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy)); + return getTruncateOrZeroExtend(getSCEV(C), Ty); +} + +const SCEV *ScalarEvolution::getUnknown(Value *V) { + // Don't attempt to do anything other than create a SCEVUnknown object + // here. createSCEV only calls getUnknown after checking for all other + // interesting possibilities, and any other code that calls getUnknown + // is doing so in order to hide a value from SCEV canonicalization. + + FoldingSetNodeID ID; + ID.AddInteger(scUnknown); + ID.AddPointer(V); + void *IP = 0; + if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) { + assert(cast<SCEVUnknown>(S)->getValue() == V && + "Stale SCEVUnknown in uniquing map!"); + return S; + } + SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this, + FirstUnknown); + FirstUnknown = cast<SCEVUnknown>(S); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + +//===----------------------------------------------------------------------===// +// Basic SCEV Analysis and PHI Idiom Recognition Code +// + +/// isSCEVable - Test if values of the given type are analyzable within +/// the SCEV framework. This primarily includes integer types, and it +/// can optionally include pointer types if the ScalarEvolution class +/// has access to target-specific information. +bool ScalarEvolution::isSCEVable(const Type *Ty) const { + // Integers and pointers are always SCEVable. + return Ty->isIntegerTy() || Ty->isPointerTy(); +} + +/// getTypeSizeInBits - Return the size in bits of the specified type, +/// for which isSCEVable must return true. +uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const { + assert(isSCEVable(Ty) && "Type is not SCEVable!"); + + // If we have a TargetData, use it! + if (TD) + return TD->getTypeSizeInBits(Ty); + + // Integer types have fixed sizes. + if (Ty->isIntegerTy()) + return Ty->getPrimitiveSizeInBits(); + + // The only other support type is pointer. Without TargetData, conservatively + // assume pointers are 64-bit. + assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!"); + return 64; +} + +/// getEffectiveSCEVType - Return a type with the same bitwidth as +/// the given type and which represents how SCEV will treat the given +/// type, for which isSCEVable must return true. For pointer types, +/// this is the pointer-sized integer type. +const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const { + assert(isSCEVable(Ty) && "Type is not SCEVable!"); + + if (Ty->isIntegerTy()) + return Ty; + + // The only other support type is pointer. + assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!"); + if (TD) return TD->getIntPtrType(getContext()); + + // Without TargetData, conservatively assume pointers are 64-bit. + return Type::getInt64Ty(getContext()); +} + +const SCEV *ScalarEvolution::getCouldNotCompute() { + return &CouldNotCompute; +} + +/// getSCEV - Return an existing SCEV if it exists, otherwise analyze the +/// expression and create a new one. +const SCEV *ScalarEvolution::getSCEV(Value *V) { + assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); + + ValueExprMapType::const_iterator I = ValueExprMap.find(V); + if (I != ValueExprMap.end()) return I->second; + const SCEV *S = createSCEV(V); + + // The process of creating a SCEV for V may have caused other SCEVs + // to have been created, so it's necessary to insert the new entry + // from scratch, rather than trying to remember the insert position + // above. + ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S)); + return S; +} + +/// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V +/// +const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) { + if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V)) + return getConstant( + cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue()))); + + const Type *Ty = V->getType(); + Ty = getEffectiveSCEVType(Ty); + return getMulExpr(V, + getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)))); +} + +/// getNotSCEV - Return a SCEV corresponding to ~V = -1-V +const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { + if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V)) + return getConstant( + cast<ConstantInt>(ConstantExpr::getNot(VC->getValue()))); + + const Type *Ty = V->getType(); + Ty = getEffectiveSCEVType(Ty); + const SCEV *AllOnes = + getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))); + return getMinusSCEV(AllOnes, V); +} + +/// getMinusSCEV - Return a SCEV corresponding to LHS - RHS. +/// +const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, + const SCEV *RHS) { + // Fast path: X - X --> 0. + if (LHS == RHS) + return getConstant(LHS->getType(), 0); + + // X - Y --> X + -Y + return getAddExpr(LHS, getNegativeSCEV(RHS)); +} + +/// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the +/// input value to the specified type. If the type must be extended, it is zero +/// extended. +const SCEV * +ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, + const Type *Ty) { + const Type *SrcTy = V->getType(); + assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot truncate or zero extend with non-integer arguments!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) + return getTruncateExpr(V, Ty); + return getZeroExtendExpr(V, Ty); +} + +/// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion of the +/// input value to the specified type. If the type must be extended, it is sign +/// extended. +const SCEV * +ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, + const Type *Ty) { + const Type *SrcTy = V->getType(); + assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot truncate or zero extend with non-integer arguments!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) + return getTruncateExpr(V, Ty); + return getSignExtendExpr(V, Ty); +} + +/// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of the +/// input value to the specified type. If the type must be extended, it is zero +/// extended. The conversion must not be narrowing. +const SCEV * +ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) { + const Type *SrcTy = V->getType(); + assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot noop or zero extend with non-integer arguments!"); + assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && + "getNoopOrZeroExtend cannot truncate!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + return getZeroExtendExpr(V, Ty); +} + +/// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the +/// input value to the specified type. If the type must be extended, it is sign +/// extended. The conversion must not be narrowing. +const SCEV * +ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) { + const Type *SrcTy = V->getType(); + assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot noop or sign extend with non-integer arguments!"); + assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && + "getNoopOrSignExtend cannot truncate!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + return getSignExtendExpr(V, Ty); +} + +/// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of +/// the input value to the specified type. If the type must be extended, +/// it is extended with unspecified bits. The conversion must not be +/// narrowing. +const SCEV * +ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) { + const Type *SrcTy = V->getType(); + assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot noop or any extend with non-integer arguments!"); + assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && + "getNoopOrAnyExtend cannot truncate!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + return getAnyExtendExpr(V, Ty); +} + +/// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the +/// input value to the specified type. The conversion must not be widening. +const SCEV * +ScalarEvolution::getTruncateOrNoop(const SCEV *V, const Type *Ty) { + const Type *SrcTy = V->getType(); + assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot truncate or noop with non-integer arguments!"); + assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) && + "getTruncateOrNoop cannot extend!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + return getTruncateExpr(V, Ty); +} + +/// getUMaxFromMismatchedTypes - Promote the operands to the wider of +/// the types using zero-extension, and then perform a umax operation +/// with them. +const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS, + const SCEV *RHS) { + const SCEV *PromotedLHS = LHS; + const SCEV *PromotedRHS = RHS; + + if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType())) + PromotedRHS = getZeroExtendExpr(RHS, LHS->getType()); + else + PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType()); + + return getUMaxExpr(PromotedLHS, PromotedRHS); +} + +/// getUMinFromMismatchedTypes - Promote the operands to the wider of +/// the types using zero-extension, and then perform a umin operation +/// with them. +const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS, + const SCEV *RHS) { + const SCEV *PromotedLHS = LHS; + const SCEV *PromotedRHS = RHS; + + if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType())) + PromotedRHS = getZeroExtendExpr(RHS, LHS->getType()); + else + PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType()); + + return getUMinExpr(PromotedLHS, PromotedRHS); +} + +/// PushDefUseChildren - Push users of the given Instruction +/// onto the given Worklist. +static void +PushDefUseChildren(Instruction *I, + SmallVectorImpl<Instruction *> &Worklist) { + // Push the def-use children onto the Worklist stack. + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) + Worklist.push_back(cast<Instruction>(*UI)); +} + +/// ForgetSymbolicValue - This looks up computed SCEV values for all +/// instructions that depend on the given instruction and removes them from +/// the ValueExprMapType map if they reference SymName. This is used during PHI +/// resolution. +void +ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) { + SmallVector<Instruction *, 16> Worklist; + PushDefUseChildren(PN, Worklist); + + SmallPtrSet<Instruction *, 8> Visited; + Visited.insert(PN); + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + if (!Visited.insert(I)) continue; + + ValueExprMapType::iterator It = + ValueExprMap.find(static_cast<Value *>(I)); + if (It != ValueExprMap.end()) { + // Short-circuit the def-use traversal if the symbolic name + // ceases to appear in expressions. + if (It->second != SymName && !It->second->hasOperand(SymName)) + continue; + + // SCEVUnknown for a PHI either means that it has an unrecognized + // structure, it's a PHI that's in the progress of being computed + // by createNodeForPHI, or it's a single-value PHI. In the first case, + // additional loop trip count information isn't going to change anything. + // In the second case, createNodeForPHI will perform the necessary + // updates on its own when it gets to that point. In the third, we do + // want to forget the SCEVUnknown. + if (!isa<PHINode>(I) || + !isa<SCEVUnknown>(It->second) || + (I != PN && It->second == SymName)) { + ValuesAtScopes.erase(It->second); + ValueExprMap.erase(It); + } + } + + PushDefUseChildren(I, Worklist); + } +} + +/// createNodeForPHI - PHI nodes have two cases. Either the PHI node exists in +/// a loop header, making it a potential recurrence, or it doesn't. +/// +const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { + if (const Loop *L = LI->getLoopFor(PN->getParent())) + if (L->getHeader() == PN->getParent()) { + // The loop may have multiple entrances or multiple exits; we can analyze + // this phi as an addrec if it has a unique entry value and a unique + // backedge value. + Value *BEValueV = 0, *StartValueV = 0; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = PN->getIncomingValue(i); + if (L->contains(PN->getIncomingBlock(i))) { + if (!BEValueV) { + BEValueV = V; + } else if (BEValueV != V) { + BEValueV = 0; + break; + } + } else if (!StartValueV) { + StartValueV = V; + } else if (StartValueV != V) { + StartValueV = 0; + break; + } + } + if (BEValueV && StartValueV) { + // While we are analyzing this PHI node, handle its value symbolically. + const SCEV *SymbolicName = getUnknown(PN); + assert(ValueExprMap.find(PN) == ValueExprMap.end() && + "PHI node already processed?"); + ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName)); + + // Using this symbolic name for the PHI, analyze the value coming around + // the back-edge. + const SCEV *BEValue = getSCEV(BEValueV); + + // NOTE: If BEValue is loop invariant, we know that the PHI node just + // has a special value for the first iteration of the loop. + + // If the value coming around the backedge is an add with the symbolic + // value we just inserted, then we found a simple induction variable! + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) { + // If there is a single occurrence of the symbolic value, replace it + // with a recurrence. + unsigned FoundIndex = Add->getNumOperands(); + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if (Add->getOperand(i) == SymbolicName) + if (FoundIndex == e) { + FoundIndex = i; + break; + } + + if (FoundIndex != Add->getNumOperands()) { + // Create an add with everything but the specified operand. + SmallVector<const SCEV *, 8> Ops; + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if (i != FoundIndex) + Ops.push_back(Add->getOperand(i)); + const SCEV *Accum = getAddExpr(Ops); + + // This is not a valid addrec if the step amount is varying each + // loop iteration, but is not itself an addrec in this loop. + if (Accum->isLoopInvariant(L) || + (isa<SCEVAddRecExpr>(Accum) && + cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) { + bool HasNUW = false; + bool HasNSW = false; + + // If the increment doesn't overflow, then neither the addrec nor + // the post-increment will overflow. + if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) { + if (OBO->hasNoUnsignedWrap()) + HasNUW = true; + if (OBO->hasNoSignedWrap()) + HasNSW = true; + } + + const SCEV *StartVal = getSCEV(StartValueV); + const SCEV *PHISCEV = + getAddRecExpr(StartVal, Accum, L, HasNUW, HasNSW); + + // Since the no-wrap flags are on the increment, they apply to the + // post-incremented value as well. + if (Accum->isLoopInvariant(L)) + (void)getAddRecExpr(getAddExpr(StartVal, Accum), + Accum, L, HasNUW, HasNSW); + + // Okay, for the entire analysis of this edge we assumed the PHI + // to be symbolic. We now need to go back and purge all of the + // entries for the scalars that use the symbolic expression. + ForgetSymbolicName(PN, SymbolicName); + ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; + return PHISCEV; + } + } + } else if (const SCEVAddRecExpr *AddRec = + dyn_cast<SCEVAddRecExpr>(BEValue)) { + // Otherwise, this could be a loop like this: + // i = 0; for (j = 1; ..; ++j) { .... i = j; } + // In this case, j = {1,+,1} and BEValue is j. + // Because the other in-value of i (0) fits the evolution of BEValue + // i really is an addrec evolution. + if (AddRec->getLoop() == L && AddRec->isAffine()) { + const SCEV *StartVal = getSCEV(StartValueV); + + // If StartVal = j.start - j.stride, we can use StartVal as the + // initial step of the addrec evolution. + if (StartVal == getMinusSCEV(AddRec->getOperand(0), + AddRec->getOperand(1))) { + const SCEV *PHISCEV = + getAddRecExpr(StartVal, AddRec->getOperand(1), L); + + // Okay, for the entire analysis of this edge we assumed the PHI + // to be symbolic. We now need to go back and purge all of the + // entries for the scalars that use the symbolic expression. + ForgetSymbolicName(PN, SymbolicName); + ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; + return PHISCEV; + } + } + } + } + } + + // If the PHI has a single incoming value, follow that value, unless the + // PHI's incoming blocks are in a different loop, in which case doing so + // risks breaking LCSSA form. Instcombine would normally zap these, but + // it doesn't have DominatorTree information, so it may miss cases. + if (Value *V = PN->hasConstantValue(DT)) { + bool AllSameLoop = true; + Loop *PNLoop = LI->getLoopFor(PN->getParent()); + for (size_t i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (LI->getLoopFor(PN->getIncomingBlock(i)) != PNLoop) { + AllSameLoop = false; + break; + } + if (AllSameLoop) + return getSCEV(V); + } + + // If it's not a loop phi, we can't handle it yet. + return getUnknown(PN); +} + +/// createNodeForGEP - Expand GEP instructions into add and multiply +/// operations. This allows them to be analyzed by regular SCEV code. +/// +const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { + + // Don't blindly transfer the inbounds flag from the GEP instruction to the + // Add expression, because the Instruction may be guarded by control flow + // and the no-overflow bits may not be valid for the expression in any + // context. + + const Type *IntPtrTy = getEffectiveSCEVType(GEP->getType()); + Value *Base = GEP->getOperand(0); + // Don't attempt to analyze GEPs over unsized objects. + if (!cast<PointerType>(Base->getType())->getElementType()->isSized()) + return getUnknown(GEP); + const SCEV *TotalOffset = getConstant(IntPtrTy, 0); + gep_type_iterator GTI = gep_type_begin(GEP); + for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()), + E = GEP->op_end(); + I != E; ++I) { + Value *Index = *I; + // Compute the (potentially symbolic) offset in bytes for this index. + if (const StructType *STy = dyn_cast<StructType>(*GTI++)) { + // For a struct, add the member offset. + unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); + const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo); + + // Add the field offset to the running total offset. + TotalOffset = getAddExpr(TotalOffset, FieldOffset); + } else { + // For an array, add the element offset, explicitly scaled. + const SCEV *ElementSize = getSizeOfExpr(*GTI); + const SCEV *IndexS = getSCEV(Index); + // Getelementptr indices are signed. + IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy); + + // Multiply the index by the element size to compute the element offset. + const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize); + + // Add the element offset to the running total offset. + TotalOffset = getAddExpr(TotalOffset, LocalOffset); + } + } + + // Get the SCEV for the GEP base. + const SCEV *BaseS = getSCEV(Base); + + // Add the total offset from all the GEP indices to the base. + return getAddExpr(BaseS, TotalOffset); +} + +/// GetMinTrailingZeros - Determine the minimum number of zero bits that S is +/// guaranteed to end in (at every loop iteration). It is, at the same time, +/// the minimum number of times S is divisible by 2. For example, given {4,+,8} +/// it returns 2. If S is guaranteed to be 0, it returns the bitwidth of S. +uint32_t +ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) + return C->getValue()->getValue().countTrailingZeros(); + + if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S)) + return std::min(GetMinTrailingZeros(T->getOperand()), + (uint32_t)getTypeSizeInBits(T->getType())); + + if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) { + uint32_t OpRes = GetMinTrailingZeros(E->getOperand()); + return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ? + getTypeSizeInBits(E->getType()) : OpRes; + } + + if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) { + uint32_t OpRes = GetMinTrailingZeros(E->getOperand()); + return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ? + getTypeSizeInBits(E->getType()) : OpRes; + } + + if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) { + // The result is the min of all operands results. + uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0)); + for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i) + MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i))); + return MinOpRes; + } + + if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) { + // The result is the sum of all operands results. + uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0)); + uint32_t BitWidth = getTypeSizeInBits(M->getType()); + for (unsigned i = 1, e = M->getNumOperands(); + SumOpRes != BitWidth && i != e; ++i) + SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)), + BitWidth); + return SumOpRes; + } + + if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) { + // The result is the min of all operands results. + uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0)); + for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i) + MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i))); + return MinOpRes; + } + + if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) { + // The result is the min of all operands results. + uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0)); + for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i) + MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i))); + return MinOpRes; + } + + if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) { + // The result is the min of all operands results. + uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0)); + for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i) + MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i))); + return MinOpRes; + } + + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + // For a SCEVUnknown, ask ValueTracking. + unsigned BitWidth = getTypeSizeInBits(U->getType()); + APInt Mask = APInt::getAllOnesValue(BitWidth); + APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); + ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones); + return Zeros.countTrailingOnes(); + } + + // SCEVUDivExpr + return 0; +} + +/// getUnsignedRange - Determine the unsigned range for a particular SCEV. +/// +ConstantRange +ScalarEvolution::getUnsignedRange(const SCEV *S) { + + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) + return ConstantRange(C->getValue()->getValue()); + + unsigned BitWidth = getTypeSizeInBits(S->getType()); + ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true); + + // If the value has known zeros, the maximum unsigned value will have those + // known zeros as well. + uint32_t TZ = GetMinTrailingZeros(S); + if (TZ != 0) + ConservativeResult = + ConstantRange(APInt::getMinValue(BitWidth), + APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1); + + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + ConstantRange X = getUnsignedRange(Add->getOperand(0)); + for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) + X = X.add(getUnsignedRange(Add->getOperand(i))); + return ConservativeResult.intersectWith(X); + } + + if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { + ConstantRange X = getUnsignedRange(Mul->getOperand(0)); + for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) + X = X.multiply(getUnsignedRange(Mul->getOperand(i))); + return ConservativeResult.intersectWith(X); + } + + if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) { + ConstantRange X = getUnsignedRange(SMax->getOperand(0)); + for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) + X = X.smax(getUnsignedRange(SMax->getOperand(i))); + return ConservativeResult.intersectWith(X); + } + + if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) { + ConstantRange X = getUnsignedRange(UMax->getOperand(0)); + for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) + X = X.umax(getUnsignedRange(UMax->getOperand(i))); + return ConservativeResult.intersectWith(X); + } + + if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { + ConstantRange X = getUnsignedRange(UDiv->getLHS()); + ConstantRange Y = getUnsignedRange(UDiv->getRHS()); + return ConservativeResult.intersectWith(X.udiv(Y)); + } + + if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) { + ConstantRange X = getUnsignedRange(ZExt->getOperand()); + return ConservativeResult.intersectWith(X.zeroExtend(BitWidth)); + } + + if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) { + ConstantRange X = getUnsignedRange(SExt->getOperand()); + return ConservativeResult.intersectWith(X.signExtend(BitWidth)); + } + + if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) { + ConstantRange X = getUnsignedRange(Trunc->getOperand()); + return ConservativeResult.intersectWith(X.truncate(BitWidth)); + } + + if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { + // If there's no unsigned wrap, the value will never be less than its + // initial value. + if (AddRec->hasNoUnsignedWrap()) + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart())) + if (!C->getValue()->isZero()) + ConservativeResult = + ConservativeResult.intersectWith( + ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0))); + + // TODO: non-affine addrec + if (AddRec->isAffine()) { + const Type *Ty = AddRec->getType(); + const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); + if (!isa<SCEVCouldNotCompute>(MaxBECount) && + getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) { + MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty); + + const SCEV *Start = AddRec->getStart(); + const SCEV *Step = AddRec->getStepRecurrence(*this); + + ConstantRange StartRange = getUnsignedRange(Start); + ConstantRange StepRange = getSignedRange(Step); + ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount); + ConstantRange EndRange = + StartRange.add(MaxBECountRange.multiply(StepRange)); + + // Check for overflow. This must be done with ConstantRange arithmetic + // because we could be called from within the ScalarEvolution overflow + // checking code. + ConstantRange ExtStartRange = StartRange.zextOrTrunc(BitWidth*2+1); + ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1); + ConstantRange ExtMaxBECountRange = + MaxBECountRange.zextOrTrunc(BitWidth*2+1); + ConstantRange ExtEndRange = EndRange.zextOrTrunc(BitWidth*2+1); + if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) != + ExtEndRange) + return ConservativeResult; + + APInt Min = APIntOps::umin(StartRange.getUnsignedMin(), + EndRange.getUnsignedMin()); + APInt Max = APIntOps::umax(StartRange.getUnsignedMax(), + EndRange.getUnsignedMax()); + if (Min.isMinValue() && Max.isMaxValue()) + return ConservativeResult; + return ConservativeResult.intersectWith(ConstantRange(Min, Max+1)); + } + } + + return ConservativeResult; + } + + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + // For a SCEVUnknown, ask ValueTracking. + APInt Mask = APInt::getAllOnesValue(BitWidth); + APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); + ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD); + if (Ones == ~Zeros + 1) + return ConservativeResult; + return ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)); + } + + return ConservativeResult; +} + +/// getSignedRange - Determine the signed range for a particular SCEV. +/// +ConstantRange +ScalarEvolution::getSignedRange(const SCEV *S) { + + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) + return ConstantRange(C->getValue()->getValue()); + + unsigned BitWidth = getTypeSizeInBits(S->getType()); + ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true); + + // If the value has known zeros, the maximum signed value will have those + // known zeros as well. + uint32_t TZ = GetMinTrailingZeros(S); + if (TZ != 0) + ConservativeResult = + ConstantRange(APInt::getSignedMinValue(BitWidth), + APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1); + + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + ConstantRange X = getSignedRange(Add->getOperand(0)); + for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) + X = X.add(getSignedRange(Add->getOperand(i))); + return ConservativeResult.intersectWith(X); + } + + if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { + ConstantRange X = getSignedRange(Mul->getOperand(0)); + for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) + X = X.multiply(getSignedRange(Mul->getOperand(i))); + return ConservativeResult.intersectWith(X); + } + + if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) { + ConstantRange X = getSignedRange(SMax->getOperand(0)); + for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) + X = X.smax(getSignedRange(SMax->getOperand(i))); + return ConservativeResult.intersectWith(X); + } + + if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) { + ConstantRange X = getSignedRange(UMax->getOperand(0)); + for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) + X = X.umax(getSignedRange(UMax->getOperand(i))); + return ConservativeResult.intersectWith(X); + } + + if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { + ConstantRange X = getSignedRange(UDiv->getLHS()); + ConstantRange Y = getSignedRange(UDiv->getRHS()); + return ConservativeResult.intersectWith(X.udiv(Y)); + } + + if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) { + ConstantRange X = getSignedRange(ZExt->getOperand()); + return ConservativeResult.intersectWith(X.zeroExtend(BitWidth)); + } + + if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) { + ConstantRange X = getSignedRange(SExt->getOperand()); + return ConservativeResult.intersectWith(X.signExtend(BitWidth)); + } + + if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) { + ConstantRange X = getSignedRange(Trunc->getOperand()); + return ConservativeResult.intersectWith(X.truncate(BitWidth)); + } + + if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { + // If there's no signed wrap, and all the operands have the same sign or + // zero, the value won't ever change sign. + if (AddRec->hasNoSignedWrap()) { + bool AllNonNeg = true; + bool AllNonPos = true; + for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { + if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false; + if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false; + } + if (AllNonNeg) + ConservativeResult = ConservativeResult.intersectWith( + ConstantRange(APInt(BitWidth, 0), + APInt::getSignedMinValue(BitWidth))); + else if (AllNonPos) + ConservativeResult = ConservativeResult.intersectWith( + ConstantRange(APInt::getSignedMinValue(BitWidth), + APInt(BitWidth, 1))); + } + + // TODO: non-affine addrec + if (AddRec->isAffine()) { + const Type *Ty = AddRec->getType(); + const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); + if (!isa<SCEVCouldNotCompute>(MaxBECount) && + getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) { + MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty); + + const SCEV *Start = AddRec->getStart(); + const SCEV *Step = AddRec->getStepRecurrence(*this); + + ConstantRange StartRange = getSignedRange(Start); + ConstantRange StepRange = getSignedRange(Step); + ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount); + ConstantRange EndRange = + StartRange.add(MaxBECountRange.multiply(StepRange)); + + // Check for overflow. This must be done with ConstantRange arithmetic + // because we could be called from within the ScalarEvolution overflow + // checking code. + ConstantRange ExtStartRange = StartRange.sextOrTrunc(BitWidth*2+1); + ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1); + ConstantRange ExtMaxBECountRange = + MaxBECountRange.zextOrTrunc(BitWidth*2+1); + ConstantRange ExtEndRange = EndRange.sextOrTrunc(BitWidth*2+1); + if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) != + ExtEndRange) + return ConservativeResult; + + APInt Min = APIntOps::smin(StartRange.getSignedMin(), + EndRange.getSignedMin()); + APInt Max = APIntOps::smax(StartRange.getSignedMax(), + EndRange.getSignedMax()); + if (Min.isMinSignedValue() && Max.isMaxSignedValue()) + return ConservativeResult; + return ConservativeResult.intersectWith(ConstantRange(Min, Max+1)); + } + } + + return ConservativeResult; + } + + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + // For a SCEVUnknown, ask ValueTracking. + if (!U->getValue()->getType()->isIntegerTy() && !TD) + return ConservativeResult; + unsigned NS = ComputeNumSignBits(U->getValue(), TD); + if (NS == 1) + return ConservativeResult; + return ConservativeResult.intersectWith( + ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), + APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1)); + } + + return ConservativeResult; +} + +/// createSCEV - We know that there is no SCEV for the specified value. +/// Analyze the expression. +/// +const SCEV *ScalarEvolution::createSCEV(Value *V) { + if (!isSCEVable(V->getType())) + return getUnknown(V); + + unsigned Opcode = Instruction::UserOp1; + if (Instruction *I = dyn_cast<Instruction>(V)) { + Opcode = I->getOpcode(); + + // Don't attempt to analyze instructions in blocks that aren't + // reachable. Such instructions don't matter, and they aren't required + // to obey basic rules for definitions dominating uses which this + // analysis depends on. + if (!DT->isReachableFromEntry(I->getParent())) + return getUnknown(V); + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + Opcode = CE->getOpcode(); + else if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) + return getConstant(CI); + else if (isa<ConstantPointerNull>(V)) + return getConstant(V->getType(), 0); + else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) + return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee()); + else + return getUnknown(V); + + Operator *U = cast<Operator>(V); + switch (Opcode) { + case Instruction::Add: { + // The simple thing to do would be to just call getSCEV on both operands + // and call getAddExpr with the result. However if we're looking at a + // bunch of things all added together, this can be quite inefficient, + // because it leads to N-1 getAddExpr calls for N ultimate operands. + // Instead, gather up all the operands and make a single getAddExpr call. + // LLVM IR canonical form means we need only traverse the left operands. + SmallVector<const SCEV *, 4> AddOps; + AddOps.push_back(getSCEV(U->getOperand(1))); + for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) { + unsigned Opcode = Op->getValueID() - Value::InstructionVal; + if (Opcode != Instruction::Add && Opcode != Instruction::Sub) + break; + U = cast<Operator>(Op); + const SCEV *Op1 = getSCEV(U->getOperand(1)); + if (Opcode == Instruction::Sub) + AddOps.push_back(getNegativeSCEV(Op1)); + else + AddOps.push_back(Op1); + } + AddOps.push_back(getSCEV(U->getOperand(0))); + return getAddExpr(AddOps); + } + case Instruction::Mul: { + // See the Add code above. + SmallVector<const SCEV *, 4> MulOps; + MulOps.push_back(getSCEV(U->getOperand(1))); + for (Value *Op = U->getOperand(0); + Op->getValueID() == Instruction::Mul + Value::InstructionVal; + Op = U->getOperand(0)) { + U = cast<Operator>(Op); + MulOps.push_back(getSCEV(U->getOperand(1))); + } + MulOps.push_back(getSCEV(U->getOperand(0))); + return getMulExpr(MulOps); + } + case Instruction::UDiv: + return getUDivExpr(getSCEV(U->getOperand(0)), + getSCEV(U->getOperand(1))); + case Instruction::Sub: + return getMinusSCEV(getSCEV(U->getOperand(0)), + getSCEV(U->getOperand(1))); + case Instruction::And: + // For an expression like x&255 that merely masks off the high bits, + // use zext(trunc(x)) as the SCEV expression. + if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { + if (CI->isNullValue()) + return getSCEV(U->getOperand(1)); + if (CI->isAllOnesValue()) + return getSCEV(U->getOperand(0)); + const APInt &A = CI->getValue(); + + // Instcombine's ShrinkDemandedConstant may strip bits out of + // constants, obscuring what would otherwise be a low-bits mask. + // Use ComputeMaskedBits to compute what ShrinkDemandedConstant + // knew about to reconstruct a low-bits mask value. + unsigned LZ = A.countLeadingZeros(); + unsigned BitWidth = A.getBitWidth(); + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(U->getOperand(0), AllOnes, KnownZero, KnownOne, TD); + + APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ); + + if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask)) + return + getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)), + IntegerType::get(getContext(), BitWidth - LZ)), + U->getType()); + } + break; + + case Instruction::Or: + // If the RHS of the Or is a constant, we may have something like: + // X*4+1 which got turned into X*4|1. Handle this as an Add so loop + // optimizations will transparently handle this case. + // + // In order for this transformation to be safe, the LHS must be of the + // form X*(2^n) and the Or constant must be less than 2^n. + if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { + const SCEV *LHS = getSCEV(U->getOperand(0)); + const APInt &CIVal = CI->getValue(); + if (GetMinTrailingZeros(LHS) >= + (CIVal.getBitWidth() - CIVal.countLeadingZeros())) { + // Build a plain add SCEV. + const SCEV *S = getAddExpr(LHS, getSCEV(CI)); + // If the LHS of the add was an addrec and it has no-wrap flags, + // transfer the no-wrap flags, since an or won't introduce a wrap. + if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) { + const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS); + if (OldAR->hasNoUnsignedWrap()) + const_cast<SCEVAddRecExpr *>(NewAR)->setHasNoUnsignedWrap(true); + if (OldAR->hasNoSignedWrap()) + const_cast<SCEVAddRecExpr *>(NewAR)->setHasNoSignedWrap(true); + } + return S; + } + } + break; + case Instruction::Xor: + if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { + // If the RHS of the xor is a signbit, then this is just an add. + // Instcombine turns add of signbit into xor as a strength reduction step. + if (CI->getValue().isSignBit()) + return getAddExpr(getSCEV(U->getOperand(0)), + getSCEV(U->getOperand(1))); + + // If the RHS of xor is -1, then this is a not operation. + if (CI->isAllOnesValue()) + return getNotSCEV(getSCEV(U->getOperand(0))); + + // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask. + // This is a variant of the check for xor with -1, and it handles + // the case where instcombine has trimmed non-demanded bits out + // of an xor with -1. + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0))) + if (ConstantInt *LCI = dyn_cast<ConstantInt>(BO->getOperand(1))) + if (BO->getOpcode() == Instruction::And && + LCI->getValue() == CI->getValue()) + if (const SCEVZeroExtendExpr *Z = + dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) { + const Type *UTy = U->getType(); + const SCEV *Z0 = Z->getOperand(); + const Type *Z0Ty = Z0->getType(); + unsigned Z0TySize = getTypeSizeInBits(Z0Ty); + + // If C is a low-bits mask, the zero extend is serving to + // mask off the high bits. Complement the operand and + // re-apply the zext. + if (APIntOps::isMask(Z0TySize, CI->getValue())) + return getZeroExtendExpr(getNotSCEV(Z0), UTy); + + // If C is a single bit, it may be in the sign-bit position + // before the zero-extend. In this case, represent the xor + // using an add, which is equivalent, and re-apply the zext. + APInt Trunc = APInt(CI->getValue()).trunc(Z0TySize); + if (APInt(Trunc).zext(getTypeSizeInBits(UTy)) == CI->getValue() && + Trunc.isSignBit()) + return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)), + UTy); + } + } + break; + + case Instruction::Shl: + // Turn shift left of a constant amount into a multiply. + if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) { + uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth(); + + // If the shift count is not less than the bitwidth, the result of + // the shift is undefined. Don't try to analyze it, because the + // resolution chosen here may differ from the resolution chosen in + // other parts of the compiler. + if (SA->getValue().uge(BitWidth)) + break; + + Constant *X = ConstantInt::get(getContext(), + APInt(BitWidth, 1).shl(SA->getZExtValue())); + return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X)); + } + break; + + case Instruction::LShr: + // Turn logical shift right of a constant into a unsigned divide. + if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) { + uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth(); + + // If the shift count is not less than the bitwidth, the result of + // the shift is undefined. Don't try to analyze it, because the + // resolution chosen here may differ from the resolution chosen in + // other parts of the compiler. + if (SA->getValue().uge(BitWidth)) + break; + + Constant *X = ConstantInt::get(getContext(), + APInt(BitWidth, 1).shl(SA->getZExtValue())); + return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X)); + } + break; + + case Instruction::AShr: + // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression. + if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) + if (Operator *L = dyn_cast<Operator>(U->getOperand(0))) + if (L->getOpcode() == Instruction::Shl && + L->getOperand(1) == U->getOperand(1)) { + uint64_t BitWidth = getTypeSizeInBits(U->getType()); + + // If the shift count is not less than the bitwidth, the result of + // the shift is undefined. Don't try to analyze it, because the + // resolution chosen here may differ from the resolution chosen in + // other parts of the compiler. + if (CI->getValue().uge(BitWidth)) + break; + + uint64_t Amt = BitWidth - CI->getZExtValue(); + if (Amt == BitWidth) + return getSCEV(L->getOperand(0)); // shift by zero --> noop + return + getSignExtendExpr(getTruncateExpr(getSCEV(L->getOperand(0)), + IntegerType::get(getContext(), + Amt)), + U->getType()); + } + break; + + case Instruction::Trunc: + return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType()); + + case Instruction::ZExt: + return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType()); + + case Instruction::SExt: + return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType()); + + case Instruction::BitCast: + // BitCasts are no-op casts so we just eliminate the cast. + if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType())) + return getSCEV(U->getOperand(0)); + break; + + // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can + // lead to pointer expressions which cannot safely be expanded to GEPs, + // because ScalarEvolution doesn't respect the GEP aliasing rules when + // simplifying integer expressions. + + case Instruction::GetElementPtr: + return createNodeForGEP(cast<GEPOperator>(U)); + + case Instruction::PHI: + return createNodeForPHI(cast<PHINode>(U)); + + case Instruction::Select: + // This could be a smax or umax that was lowered earlier. + // Try to recover it. + if (ICmpInst *ICI = dyn_cast<ICmpInst>(U->getOperand(0))) { + Value *LHS = ICI->getOperand(0); + Value *RHS = ICI->getOperand(1); + switch (ICI->getPredicate()) { + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + std::swap(LHS, RHS); + // fall through + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + // a >s b ? a+x : b+x -> smax(a, b)+x + // a >s b ? b+x : a+x -> smin(a, b)+x + if (LHS->getType() == U->getType()) { + const SCEV *LS = getSCEV(LHS); + const SCEV *RS = getSCEV(RHS); + const SCEV *LA = getSCEV(U->getOperand(1)); + const SCEV *RA = getSCEV(U->getOperand(2)); + const SCEV *LDiff = getMinusSCEV(LA, LS); + const SCEV *RDiff = getMinusSCEV(RA, RS); + if (LDiff == RDiff) + return getAddExpr(getSMaxExpr(LS, RS), LDiff); + LDiff = getMinusSCEV(LA, RS); + RDiff = getMinusSCEV(RA, LS); + if (LDiff == RDiff) + return getAddExpr(getSMinExpr(LS, RS), LDiff); + } + break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + std::swap(LHS, RHS); + // fall through + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + // a >u b ? a+x : b+x -> umax(a, b)+x + // a >u b ? b+x : a+x -> umin(a, b)+x + if (LHS->getType() == U->getType()) { + const SCEV *LS = getSCEV(LHS); + const SCEV *RS = getSCEV(RHS); + const SCEV *LA = getSCEV(U->getOperand(1)); + const SCEV *RA = getSCEV(U->getOperand(2)); + const SCEV *LDiff = getMinusSCEV(LA, LS); + const SCEV *RDiff = getMinusSCEV(RA, RS); + if (LDiff == RDiff) + return getAddExpr(getUMaxExpr(LS, RS), LDiff); + LDiff = getMinusSCEV(LA, RS); + RDiff = getMinusSCEV(RA, LS); + if (LDiff == RDiff) + return getAddExpr(getUMinExpr(LS, RS), LDiff); + } + break; + case ICmpInst::ICMP_NE: + // n != 0 ? n+x : 1+x -> umax(n, 1)+x + if (LHS->getType() == U->getType() && + isa<ConstantInt>(RHS) && + cast<ConstantInt>(RHS)->isZero()) { + const SCEV *One = getConstant(LHS->getType(), 1); + const SCEV *LS = getSCEV(LHS); + const SCEV *LA = getSCEV(U->getOperand(1)); + const SCEV *RA = getSCEV(U->getOperand(2)); + const SCEV *LDiff = getMinusSCEV(LA, LS); + const SCEV *RDiff = getMinusSCEV(RA, One); + if (LDiff == RDiff) + return getAddExpr(getUMaxExpr(One, LS), LDiff); + } + break; + case ICmpInst::ICMP_EQ: + // n == 0 ? 1+x : n+x -> umax(n, 1)+x + if (LHS->getType() == U->getType() && + isa<ConstantInt>(RHS) && + cast<ConstantInt>(RHS)->isZero()) { + const SCEV *One = getConstant(LHS->getType(), 1); + const SCEV *LS = getSCEV(LHS); + const SCEV *LA = getSCEV(U->getOperand(1)); + const SCEV *RA = getSCEV(U->getOperand(2)); + const SCEV *LDiff = getMinusSCEV(LA, One); + const SCEV *RDiff = getMinusSCEV(RA, LS); + if (LDiff == RDiff) + return getAddExpr(getUMaxExpr(One, LS), LDiff); + } + break; + default: + break; + } + } + + default: // We cannot analyze this expression. + break; + } + + return getUnknown(V); +} + + + +//===----------------------------------------------------------------------===// +// Iteration Count Computation Code +// + +/// getBackedgeTakenCount - If the specified loop has a predictable +/// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute +/// object. The backedge-taken count is the number of times the loop header +/// will be branched to from within the loop. This is one less than the +/// trip count of the loop, since it doesn't count the first iteration, +/// when the header is branched to from outside the loop. +/// +/// Note that it is not valid to call this method on a loop without a +/// loop-invariant backedge-taken count (see +/// hasLoopInvariantBackedgeTakenCount). +/// +const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) { + return getBackedgeTakenInfo(L).Exact; +} + +/// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except +/// return the least SCEV value that is known never to be less than the +/// actual backedge taken count. +const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) { + return getBackedgeTakenInfo(L).Max; +} + +/// PushLoopPHIs - Push PHI nodes in the header of the given loop +/// onto the given Worklist. +static void +PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) { + BasicBlock *Header = L->getHeader(); + + // Push all Loop-header PHIs onto the Worklist stack. + for (BasicBlock::iterator I = Header->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) + Worklist.push_back(PN); +} + +const ScalarEvolution::BackedgeTakenInfo & +ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { + // Initially insert a CouldNotCompute for this loop. If the insertion + // succeeds, proceed to actually compute a backedge-taken count and + // update the value. The temporary CouldNotCompute value tells SCEV + // code elsewhere that it shouldn't attempt to request a new + // backedge-taken count, which could result in infinite recursion. + std::pair<std::map<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair = + BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute())); + if (Pair.second) { + BackedgeTakenInfo BECount = ComputeBackedgeTakenCount(L); + if (BECount.Exact != getCouldNotCompute()) { + assert(BECount.Exact->isLoopInvariant(L) && + BECount.Max->isLoopInvariant(L) && + "Computed backedge-taken count isn't loop invariant for loop!"); + ++NumTripCountsComputed; + + // Update the value in the map. + Pair.first->second = BECount; + } else { + if (BECount.Max != getCouldNotCompute()) + // Update the value in the map. + Pair.first->second = BECount; + if (isa<PHINode>(L->getHeader()->begin())) + // Only count loops that have phi nodes as not being computable. + ++NumTripCountsNotComputed; + } + + // Now that we know more about the trip count for this loop, forget any + // existing SCEV values for PHI nodes in this loop since they are only + // conservative estimates made without the benefit of trip count + // information. This is similar to the code in forgetLoop, except that + // it handles SCEVUnknown PHI nodes specially. + if (BECount.hasAnyInfo()) { + SmallVector<Instruction *, 16> Worklist; + PushLoopPHIs(L, Worklist); + + SmallPtrSet<Instruction *, 8> Visited; + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + if (!Visited.insert(I)) continue; + + ValueExprMapType::iterator It = + ValueExprMap.find(static_cast<Value *>(I)); + if (It != ValueExprMap.end()) { + // SCEVUnknown for a PHI either means that it has an unrecognized + // structure, or it's a PHI that's in the progress of being computed + // by createNodeForPHI. In the former case, additional loop trip + // count information isn't going to change anything. In the later + // case, createNodeForPHI will perform the necessary updates on its + // own when it gets to that point. + if (!isa<PHINode>(I) || !isa<SCEVUnknown>(It->second)) { + ValuesAtScopes.erase(It->second); + ValueExprMap.erase(It); + } + if (PHINode *PN = dyn_cast<PHINode>(I)) + ConstantEvolutionLoopExitValue.erase(PN); + } + + PushDefUseChildren(I, Worklist); + } + } + } + return Pair.first->second; +} + +/// forgetLoop - This method should be called by the client when it has +/// changed a loop in a way that may effect ScalarEvolution's ability to +/// compute a trip count, or if the loop is deleted. +void ScalarEvolution::forgetLoop(const Loop *L) { + // Drop any stored trip count value. + BackedgeTakenCounts.erase(L); + + // Drop information about expressions based on loop-header PHIs. + SmallVector<Instruction *, 16> Worklist; + PushLoopPHIs(L, Worklist); + + SmallPtrSet<Instruction *, 8> Visited; + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + if (!Visited.insert(I)) continue; + + ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I)); + if (It != ValueExprMap.end()) { + ValuesAtScopes.erase(It->second); + ValueExprMap.erase(It); + if (PHINode *PN = dyn_cast<PHINode>(I)) + ConstantEvolutionLoopExitValue.erase(PN); + } + + PushDefUseChildren(I, Worklist); + } +} + +/// forgetValue - This method should be called by the client when it has +/// changed a value in a way that may effect its value, or which may +/// disconnect it from a def-use chain linking it to a loop. +void ScalarEvolution::forgetValue(Value *V) { + Instruction *I = dyn_cast<Instruction>(V); + if (!I) return; + + // Drop information about expressions based on loop-header PHIs. + SmallVector<Instruction *, 16> Worklist; + Worklist.push_back(I); + + SmallPtrSet<Instruction *, 8> Visited; + while (!Worklist.empty()) { + I = Worklist.pop_back_val(); + if (!Visited.insert(I)) continue; + + ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I)); + if (It != ValueExprMap.end()) { + ValuesAtScopes.erase(It->second); + ValueExprMap.erase(It); + if (PHINode *PN = dyn_cast<PHINode>(I)) + ConstantEvolutionLoopExitValue.erase(PN); + } + + PushDefUseChildren(I, Worklist); + } +} + +/// ComputeBackedgeTakenCount - Compute the number of times the backedge +/// of the specified loop will execute. +ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { + SmallVector<BasicBlock *, 8> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + // Examine all exits and pick the most conservative values. + const SCEV *BECount = getCouldNotCompute(); + const SCEV *MaxBECount = getCouldNotCompute(); + bool CouldNotComputeBECount = false; + for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { + BackedgeTakenInfo NewBTI = + ComputeBackedgeTakenCountFromExit(L, ExitingBlocks[i]); + + if (NewBTI.Exact == getCouldNotCompute()) { + // We couldn't compute an exact value for this exit, so + // we won't be able to compute an exact value for the loop. + CouldNotComputeBECount = true; + BECount = getCouldNotCompute(); + } else if (!CouldNotComputeBECount) { + if (BECount == getCouldNotCompute()) + BECount = NewBTI.Exact; + else + BECount = getUMinFromMismatchedTypes(BECount, NewBTI.Exact); + } + if (MaxBECount == getCouldNotCompute()) + MaxBECount = NewBTI.Max; + else if (NewBTI.Max != getCouldNotCompute()) + MaxBECount = getUMinFromMismatchedTypes(MaxBECount, NewBTI.Max); + } + + return BackedgeTakenInfo(BECount, MaxBECount); +} + +/// ComputeBackedgeTakenCountFromExit - Compute the number of times the backedge +/// of the specified loop will execute if it exits via the specified block. +ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L, + BasicBlock *ExitingBlock) { + + // Okay, we've chosen an exiting block. See what condition causes us to + // exit at this block. + // + // FIXME: we should be able to handle switch instructions (with a single exit) + BranchInst *ExitBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); + if (ExitBr == 0) return getCouldNotCompute(); + assert(ExitBr->isConditional() && "If unconditional, it can't be in loop!"); + + // At this point, we know we have a conditional branch that determines whether + // the loop is exited. However, we don't know if the branch is executed each + // time through the loop. If not, then the execution count of the branch will + // not be equal to the trip count of the loop. + // + // Currently we check for this by checking to see if the Exit branch goes to + // the loop header. If so, we know it will always execute the same number of + // times as the loop. We also handle the case where the exit block *is* the + // loop header. This is common for un-rotated loops. + // + // If both of those tests fail, walk up the unique predecessor chain to the + // header, stopping if there is an edge that doesn't exit the loop. If the + // header is reached, the execution count of the branch will be equal to the + // trip count of the loop. + // + // More extensive analysis could be done to handle more cases here. + // + if (ExitBr->getSuccessor(0) != L->getHeader() && + ExitBr->getSuccessor(1) != L->getHeader() && + ExitBr->getParent() != L->getHeader()) { + // The simple checks failed, try climbing the unique predecessor chain + // up to the header. + bool Ok = false; + for (BasicBlock *BB = ExitBr->getParent(); BB; ) { + BasicBlock *Pred = BB->getUniquePredecessor(); + if (!Pred) + return getCouldNotCompute(); + TerminatorInst *PredTerm = Pred->getTerminator(); + for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) { + BasicBlock *PredSucc = PredTerm->getSuccessor(i); + if (PredSucc == BB) + continue; + // If the predecessor has a successor that isn't BB and isn't + // outside the loop, assume the worst. + if (L->contains(PredSucc)) + return getCouldNotCompute(); + } + if (Pred == L->getHeader()) { + Ok = true; + break; + } + BB = Pred; + } + if (!Ok) + return getCouldNotCompute(); + } + + // Proceed to the next level to examine the exit condition expression. + return ComputeBackedgeTakenCountFromExitCond(L, ExitBr->getCondition(), + ExitBr->getSuccessor(0), + ExitBr->getSuccessor(1)); +} + +/// ComputeBackedgeTakenCountFromExitCond - Compute the number of times the +/// backedge of the specified loop will execute if its exit condition +/// were a conditional branch of ExitCond, TBB, and FBB. +ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L, + Value *ExitCond, + BasicBlock *TBB, + BasicBlock *FBB) { + // Check if the controlling expression for this loop is an And or Or. + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) { + if (BO->getOpcode() == Instruction::And) { + // Recurse on the operands of the and. + BackedgeTakenInfo BTI0 = + ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB); + BackedgeTakenInfo BTI1 = + ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB); + const SCEV *BECount = getCouldNotCompute(); + const SCEV *MaxBECount = getCouldNotCompute(); + if (L->contains(TBB)) { + // Both conditions must be true for the loop to continue executing. + // Choose the less conservative count. + if (BTI0.Exact == getCouldNotCompute() || + BTI1.Exact == getCouldNotCompute()) + BECount = getCouldNotCompute(); + else + BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact); + if (BTI0.Max == getCouldNotCompute()) + MaxBECount = BTI1.Max; + else if (BTI1.Max == getCouldNotCompute()) + MaxBECount = BTI0.Max; + else + MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max); + } else { + // Both conditions must be true at the same time for the loop to exit. + // For now, be conservative. + assert(L->contains(FBB) && "Loop block has no successor in loop!"); + if (BTI0.Max == BTI1.Max) + MaxBECount = BTI0.Max; + if (BTI0.Exact == BTI1.Exact) + BECount = BTI0.Exact; + } + + return BackedgeTakenInfo(BECount, MaxBECount); + } + if (BO->getOpcode() == Instruction::Or) { + // Recurse on the operands of the or. + BackedgeTakenInfo BTI0 = + ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB); + BackedgeTakenInfo BTI1 = + ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB); + const SCEV *BECount = getCouldNotCompute(); + const SCEV *MaxBECount = getCouldNotCompute(); + if (L->contains(FBB)) { + // Both conditions must be false for the loop to continue executing. + // Choose the less conservative count. + if (BTI0.Exact == getCouldNotCompute() || + BTI1.Exact == getCouldNotCompute()) + BECount = getCouldNotCompute(); + else + BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact); + if (BTI0.Max == getCouldNotCompute()) + MaxBECount = BTI1.Max; + else if (BTI1.Max == getCouldNotCompute()) + MaxBECount = BTI0.Max; + else + MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max); + } else { + // Both conditions must be false at the same time for the loop to exit. + // For now, be conservative. + assert(L->contains(TBB) && "Loop block has no successor in loop!"); + if (BTI0.Max == BTI1.Max) + MaxBECount = BTI0.Max; + if (BTI0.Exact == BTI1.Exact) + BECount = BTI0.Exact; + } + + return BackedgeTakenInfo(BECount, MaxBECount); + } + } + + // With an icmp, it may be feasible to compute an exact backedge-taken count. + // Proceed to the next level to examine the icmp. + if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) + return ComputeBackedgeTakenCountFromExitCondICmp(L, ExitCondICmp, TBB, FBB); + + // Check for a constant condition. These are normally stripped out by + // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to + // preserve the CFG and is temporarily leaving constant conditions + // in place. + if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) { + if (L->contains(FBB) == !CI->getZExtValue()) + // The backedge is always taken. + return getCouldNotCompute(); + else + // The backedge is never taken. + return getConstant(CI->getType(), 0); + } + + // If it's not an integer or pointer comparison then compute it the hard way. + return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB)); +} + +/// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of times the +/// backedge of the specified loop will execute if its exit condition +/// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB. +ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, + ICmpInst *ExitCond, + BasicBlock *TBB, + BasicBlock *FBB) { + + // If the condition was exit on true, convert the condition to exit on false + ICmpInst::Predicate Cond; + if (!L->contains(FBB)) + Cond = ExitCond->getPredicate(); + else + Cond = ExitCond->getInversePredicate(); + + // Handle common loops like: for (X = "string"; *X; ++X) + if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0))) + if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) { + BackedgeTakenInfo ItCnt = + ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond); + if (ItCnt.hasAnyInfo()) + return ItCnt; + } + + const SCEV *LHS = getSCEV(ExitCond->getOperand(0)); + const SCEV *RHS = getSCEV(ExitCond->getOperand(1)); + + // Try to evaluate any dependencies out of the loop. + LHS = getSCEVAtScope(LHS, L); + RHS = getSCEVAtScope(RHS, L); + + // At this point, we would like to compute how many iterations of the + // loop the predicate will return true for these inputs. + if (LHS->isLoopInvariant(L) && !RHS->isLoopInvariant(L)) { + // If there is a loop-invariant, force it into the RHS. + std::swap(LHS, RHS); + Cond = ICmpInst::getSwappedPredicate(Cond); + } + + // Simplify the operands before analyzing them. + (void)SimplifyICmpOperands(Cond, LHS, RHS); + + // If we have a comparison of a chrec against a constant, try to use value + // ranges to answer this query. + if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) + if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS)) + if (AddRec->getLoop() == L) { + // Form the constant range. + ConstantRange CompRange( + ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue())); + + const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this); + if (!isa<SCEVCouldNotCompute>(Ret)) return Ret; + } + + switch (Cond) { + case ICmpInst::ICMP_NE: { // while (X != Y) + // Convert to: while (X-Y != 0) + BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEV(LHS, RHS), L); + if (BTI.hasAnyInfo()) return BTI; + break; + } + case ICmpInst::ICMP_EQ: { // while (X == Y) + // Convert to: while (X-Y == 0) + BackedgeTakenInfo BTI = HowFarToNonZero(getMinusSCEV(LHS, RHS), L); + if (BTI.hasAnyInfo()) return BTI; + break; + } + case ICmpInst::ICMP_SLT: { + BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, true); + if (BTI.hasAnyInfo()) return BTI; + break; + } + case ICmpInst::ICMP_SGT: { + BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS), + getNotSCEV(RHS), L, true); + if (BTI.hasAnyInfo()) return BTI; + break; + } + case ICmpInst::ICMP_ULT: { + BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, false); + if (BTI.hasAnyInfo()) return BTI; + break; + } + case ICmpInst::ICMP_UGT: { + BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS), + getNotSCEV(RHS), L, false); + if (BTI.hasAnyInfo()) return BTI; + break; + } + default: +#if 0 + dbgs() << "ComputeBackedgeTakenCount "; + if (ExitCond->getOperand(0)->getType()->isUnsigned()) + dbgs() << "[unsigned] "; + dbgs() << *LHS << " " + << Instruction::getOpcodeName(Instruction::ICmp) + << " " << *RHS << "\n"; +#endif + break; + } + return + ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB)); +} + +static ConstantInt * +EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, + ScalarEvolution &SE) { + const SCEV *InVal = SE.getConstant(C); + const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE); + assert(isa<SCEVConstant>(Val) && + "Evaluation of SCEV at constant didn't fold correctly?"); + return cast<SCEVConstant>(Val)->getValue(); +} + +/// GetAddressedElementFromGlobal - Given a global variable with an initializer +/// and a GEP expression (missing the pointer index) indexing into it, return +/// the addressed element of the initializer or null if the index expression is +/// invalid. +static Constant * +GetAddressedElementFromGlobal(GlobalVariable *GV, + const std::vector<ConstantInt*> &Indices) { + Constant *Init = GV->getInitializer(); + for (unsigned i = 0, e = Indices.size(); i != e; ++i) { + uint64_t Idx = Indices[i]->getZExtValue(); + if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) { + assert(Idx < CS->getNumOperands() && "Bad struct index!"); + Init = cast<Constant>(CS->getOperand(Idx)); + } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) { + if (Idx >= CA->getNumOperands()) return 0; // Bogus program + Init = cast<Constant>(CA->getOperand(Idx)); + } else if (isa<ConstantAggregateZero>(Init)) { + if (const StructType *STy = dyn_cast<StructType>(Init->getType())) { + assert(Idx < STy->getNumElements() && "Bad struct index!"); + Init = Constant::getNullValue(STy->getElementType(Idx)); + } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Init->getType())) { + if (Idx >= ATy->getNumElements()) return 0; // Bogus program + Init = Constant::getNullValue(ATy->getElementType()); + } else { + llvm_unreachable("Unknown constant aggregate type!"); + } + return 0; + } else { + return 0; // Unknown initializer type + } + } + return Init; +} + +/// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition of +/// 'icmp op load X, cst', try to see if we can compute the backedge +/// execution count. +ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount( + LoadInst *LI, + Constant *RHS, + const Loop *L, + ICmpInst::Predicate predicate) { + if (LI->isVolatile()) return getCouldNotCompute(); + + // Check to see if the loaded pointer is a getelementptr of a global. + // TODO: Use SCEV instead of manually grubbing with GEPs. + GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)); + if (!GEP) return getCouldNotCompute(); + + // Make sure that it is really a constant global we are gepping, with an + // initializer, and make sure the first IDX is really 0. + GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)); + if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() || + GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) || + !cast<Constant>(GEP->getOperand(1))->isNullValue()) + return getCouldNotCompute(); + + // Okay, we allow one non-constant index into the GEP instruction. + Value *VarIdx = 0; + std::vector<ConstantInt*> Indexes; + unsigned VarIdxNum = 0; + for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i) + if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) { + Indexes.push_back(CI); + } else if (!isa<ConstantInt>(GEP->getOperand(i))) { + if (VarIdx) return getCouldNotCompute(); // Multiple non-constant idx's. + VarIdx = GEP->getOperand(i); + VarIdxNum = i-2; + Indexes.push_back(0); + } + + // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant. + // Check to see if X is a loop variant variable value now. + const SCEV *Idx = getSCEV(VarIdx); + Idx = getSCEVAtScope(Idx, L); + + // We can only recognize very limited forms of loop index expressions, in + // particular, only affine AddRec's like {C1,+,C2}. + const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx); + if (!IdxExpr || !IdxExpr->isAffine() || IdxExpr->isLoopInvariant(L) || + !isa<SCEVConstant>(IdxExpr->getOperand(0)) || + !isa<SCEVConstant>(IdxExpr->getOperand(1))) + return getCouldNotCompute(); + + unsigned MaxSteps = MaxBruteForceIterations; + for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) { + ConstantInt *ItCst = ConstantInt::get( + cast<IntegerType>(IdxExpr->getType()), IterationNum); + ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this); + + // Form the GEP offset. + Indexes[VarIdxNum] = Val; + + Constant *Result = GetAddressedElementFromGlobal(GV, Indexes); + if (Result == 0) break; // Cannot compute! + + // Evaluate the condition for this iteration. + Result = ConstantExpr::getICmp(predicate, Result, RHS); + if (!isa<ConstantInt>(Result)) break; // Couldn't decide for sure + if (cast<ConstantInt>(Result)->getValue().isMinValue()) { +#if 0 + dbgs() << "\n***\n*** Computed loop count " << *ItCst + << "\n*** From global " << *GV << "*** BB: " << *L->getHeader() + << "***\n"; +#endif + ++NumArrayLenItCounts; + return getConstant(ItCst); // Found terminating iteration! + } + } + return getCouldNotCompute(); +} + + +/// CanConstantFold - Return true if we can constant fold an instruction of the +/// specified type, assuming that all operands were constants. +static bool CanConstantFold(const Instruction *I) { + if (isa<BinaryOperator>(I) || isa<CmpInst>(I) || + isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I)) + return true; + + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (const Function *F = CI->getCalledFunction()) + return canConstantFoldCallTo(F); + return false; +} + +/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node +/// in the loop that V is derived from. We allow arbitrary operations along the +/// way, but the operands of an operation must either be constants or a value +/// derived from a constant PHI. If this expression does not fit with these +/// constraints, return null. +static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { + // If this is not an instruction, or if this is an instruction outside of the + // loop, it can't be derived from a loop PHI. + Instruction *I = dyn_cast<Instruction>(V); + if (I == 0 || !L->contains(I)) return 0; + + if (PHINode *PN = dyn_cast<PHINode>(I)) { + if (L->getHeader() == I->getParent()) + return PN; + else + // We don't currently keep track of the control flow needed to evaluate + // PHIs, so we cannot handle PHIs inside of loops. + return 0; + } + + // If we won't be able to constant fold this expression even if the operands + // are constants, return early. + if (!CanConstantFold(I)) return 0; + + // Otherwise, we can evaluate this instruction if all of its operands are + // constant or derived from a PHI node themselves. + PHINode *PHI = 0; + for (unsigned Op = 0, e = I->getNumOperands(); Op != e; ++Op) + if (!isa<Constant>(I->getOperand(Op))) { + PHINode *P = getConstantEvolvingPHI(I->getOperand(Op), L); + if (P == 0) return 0; // Not evolving from PHI + if (PHI == 0) + PHI = P; + else if (PHI != P) + return 0; // Evolving from multiple different PHIs. + } + + // This is a expression evolving from a constant PHI! + return PHI; +} + +/// EvaluateExpression - Given an expression that passes the +/// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node +/// in the loop has the value PHIVal. If we can't fold this expression for some +/// reason, return null. +static Constant *EvaluateExpression(Value *V, Constant *PHIVal, + const TargetData *TD) { + if (isa<PHINode>(V)) return PHIVal; + if (Constant *C = dyn_cast<Constant>(V)) return C; + Instruction *I = cast<Instruction>(V); + + std::vector<Constant*> Operands(I->getNumOperands()); + + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + Operands[i] = EvaluateExpression(I->getOperand(i), PHIVal, TD); + if (Operands[i] == 0) return 0; + } + + if (const CmpInst *CI = dyn_cast<CmpInst>(I)) + return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], + Operands[1], TD); + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), + &Operands[0], Operands.size(), TD); +} + +/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is +/// in the header of its containing loop, we know the loop executes a +/// constant number of times, and the PHI node is just a recurrence +/// involving constants, fold it. +Constant * +ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, + const APInt &BEs, + const Loop *L) { + std::map<PHINode*, Constant*>::const_iterator I = + ConstantEvolutionLoopExitValue.find(PN); + if (I != ConstantEvolutionLoopExitValue.end()) + return I->second; + + if (BEs.ugt(MaxBruteForceIterations)) + return ConstantEvolutionLoopExitValue[PN] = 0; // Not going to evaluate it. + + Constant *&RetVal = ConstantEvolutionLoopExitValue[PN]; + + // Since the loop is canonicalized, the PHI node must have two entries. One + // entry must be a constant (coming in from outside of the loop), and the + // second must be derived from the same PHI. + bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); + Constant *StartCST = + dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge)); + if (StartCST == 0) + return RetVal = 0; // Must be a constant. + + Value *BEValue = PN->getIncomingValue(SecondIsBackedge); + if (getConstantEvolvingPHI(BEValue, L) != PN && + !isa<Constant>(BEValue)) + return RetVal = 0; // Not derived from same PHI. + + // Execute the loop symbolically to determine the exit value. + if (BEs.getActiveBits() >= 32) + return RetVal = 0; // More than 2^32-1 iterations?? Not doing it! + + unsigned NumIterations = BEs.getZExtValue(); // must be in range + unsigned IterationNum = 0; + for (Constant *PHIVal = StartCST; ; ++IterationNum) { + if (IterationNum == NumIterations) + return RetVal = PHIVal; // Got exit value! + + // Compute the value of the PHI node for the next iteration. + Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD); + if (NextPHI == PHIVal) + return RetVal = NextPHI; // Stopped evolving! + if (NextPHI == 0) + return 0; // Couldn't evaluate! + PHIVal = NextPHI; + } +} + +/// ComputeBackedgeTakenCountExhaustively - If the loop is known to execute a +/// constant number of times (the condition evolves only from constants), +/// try to evaluate a few iterations of the loop until we get the exit +/// condition gets a value of ExitWhen (true or false). If we cannot +/// evaluate the trip count of the loop, return getCouldNotCompute(). +const SCEV * +ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L, + Value *Cond, + bool ExitWhen) { + PHINode *PN = getConstantEvolvingPHI(Cond, L); + if (PN == 0) return getCouldNotCompute(); + + // If the loop is canonicalized, the PHI will have exactly two entries. + // That's the only form we support here. + if (PN->getNumIncomingValues() != 2) return getCouldNotCompute(); + + // One entry must be a constant (coming in from outside of the loop), and the + // second must be derived from the same PHI. + bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); + Constant *StartCST = + dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge)); + if (StartCST == 0) return getCouldNotCompute(); // Must be a constant. + + Value *BEValue = PN->getIncomingValue(SecondIsBackedge); + if (getConstantEvolvingPHI(BEValue, L) != PN && + !isa<Constant>(BEValue)) + return getCouldNotCompute(); // Not derived from same PHI. + + // Okay, we find a PHI node that defines the trip count of this loop. Execute + // the loop symbolically to determine when the condition gets a value of + // "ExitWhen". + unsigned IterationNum = 0; + unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. + for (Constant *PHIVal = StartCST; + IterationNum != MaxIterations; ++IterationNum) { + ConstantInt *CondVal = + dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, PHIVal, TD)); + + // Couldn't symbolically evaluate. + if (!CondVal) return getCouldNotCompute(); + + if (CondVal->getValue() == uint64_t(ExitWhen)) { + ++NumBruteForceTripCountsComputed; + return getConstant(Type::getInt32Ty(getContext()), IterationNum); + } + + // Compute the value of the PHI node for the next iteration. + Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD); + if (NextPHI == 0 || NextPHI == PHIVal) + return getCouldNotCompute();// Couldn't evaluate or not making progress... + PHIVal = NextPHI; + } + + // Too many iterations were needed to evaluate. + return getCouldNotCompute(); +} + +/// getSCEVAtScope - Return a SCEV expression for the specified value +/// at the specified scope in the program. The L value specifies a loop +/// nest to evaluate the expression at, where null is the top-level or a +/// specified loop is immediately inside of the loop. +/// +/// This method can be used to compute the exit value for a variable defined +/// in a loop by querying what the value will hold in the parent loop. +/// +/// In the case that a relevant loop exit value cannot be computed, the +/// original value V is returned. +const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { + // Check to see if we've folded this expression at this loop before. + std::map<const Loop *, const SCEV *> &Values = ValuesAtScopes[V]; + std::pair<std::map<const Loop *, const SCEV *>::iterator, bool> Pair = + Values.insert(std::make_pair(L, static_cast<const SCEV *>(0))); + if (!Pair.second) + return Pair.first->second ? Pair.first->second : V; + + // Otherwise compute it. + const SCEV *C = computeSCEVAtScope(V, L); + ValuesAtScopes[V][L] = C; + return C; +} + +const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { + if (isa<SCEVConstant>(V)) return V; + + // If this instruction is evolved from a constant-evolving PHI, compute the + // exit value from the loop without using SCEVs. + if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) { + if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) { + const Loop *LI = (*this->LI)[I->getParent()]; + if (LI && LI->getParentLoop() == L) // Looking for loop exit value. + if (PHINode *PN = dyn_cast<PHINode>(I)) + if (PN->getParent() == LI->getHeader()) { + // Okay, there is no closed form solution for the PHI node. Check + // to see if the loop that contains it has a known backedge-taken + // count. If so, we may be able to force computation of the exit + // value. + const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI); + if (const SCEVConstant *BTCC = + dyn_cast<SCEVConstant>(BackedgeTakenCount)) { + // Okay, we know how many times the containing loop executes. If + // this is a constant evolving PHI node, get the final value at + // the specified iteration number. + Constant *RV = getConstantEvolutionLoopExitValue(PN, + BTCC->getValue()->getValue(), + LI); + if (RV) return getSCEV(RV); + } + } + + // Okay, this is an expression that we cannot symbolically evaluate + // into a SCEV. Check to see if it's possible to symbolically evaluate + // the arguments into constants, and if so, try to constant propagate the + // result. This is particularly useful for computing loop exit values. + if (CanConstantFold(I)) { + SmallVector<Constant *, 4> Operands; + bool MadeImprovement = false; + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + Value *Op = I->getOperand(i); + if (Constant *C = dyn_cast<Constant>(Op)) { + Operands.push_back(C); + continue; + } + + // If any of the operands is non-constant and if they are + // non-integer and non-pointer, don't even try to analyze them + // with scev techniques. + if (!isSCEVable(Op->getType())) + return V; + + const SCEV *OrigV = getSCEV(Op); + const SCEV *OpV = getSCEVAtScope(OrigV, L); + MadeImprovement |= OrigV != OpV; + + Constant *C = 0; + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OpV)) + C = SC->getValue(); + if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(OpV)) + C = dyn_cast<Constant>(SU->getValue()); + if (!C) return V; + if (C->getType() != Op->getType()) + C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, + Op->getType(), + false), + C, Op->getType()); + Operands.push_back(C); + } + + // Check to see if getSCEVAtScope actually made an improvement. + if (MadeImprovement) { + Constant *C = 0; + if (const CmpInst *CI = dyn_cast<CmpInst>(I)) + C = ConstantFoldCompareInstOperands(CI->getPredicate(), + Operands[0], Operands[1], TD); + else + C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), + &Operands[0], Operands.size(), TD); + if (!C) return V; + return getSCEV(C); + } + } + } + + // This is some other type of SCEVUnknown, just return it. + return V; + } + + if (const SCEVCommutativeExpr *Comm = dyn_cast<SCEVCommutativeExpr>(V)) { + // Avoid performing the look-up in the common case where the specified + // expression has no loop-variant portions. + for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) { + const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L); + if (OpAtScope != Comm->getOperand(i)) { + // Okay, at least one of these operands is loop variant but might be + // foldable. Build a new instance of the folded commutative expression. + SmallVector<const SCEV *, 8> NewOps(Comm->op_begin(), + Comm->op_begin()+i); + NewOps.push_back(OpAtScope); + + for (++i; i != e; ++i) { + OpAtScope = getSCEVAtScope(Comm->getOperand(i), L); + NewOps.push_back(OpAtScope); + } + if (isa<SCEVAddExpr>(Comm)) + return getAddExpr(NewOps); + if (isa<SCEVMulExpr>(Comm)) + return getMulExpr(NewOps); + if (isa<SCEVSMaxExpr>(Comm)) + return getSMaxExpr(NewOps); + if (isa<SCEVUMaxExpr>(Comm)) + return getUMaxExpr(NewOps); + llvm_unreachable("Unknown commutative SCEV type!"); + } + } + // If we got here, all operands are loop invariant. + return Comm; + } + + if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) { + const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L); + const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L); + if (LHS == Div->getLHS() && RHS == Div->getRHS()) + return Div; // must be loop invariant + return getUDivExpr(LHS, RHS); + } + + // If this is a loop recurrence for a loop that does not contain L, then we + // are dealing with the final value computed by the loop. + if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) { + // First, attempt to evaluate each operand. + // Avoid performing the look-up in the common case where the specified + // expression has no loop-variant portions. + for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { + const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L); + if (OpAtScope == AddRec->getOperand(i)) + continue; + + // Okay, at least one of these operands is loop variant but might be + // foldable. Build a new instance of the folded commutative expression. + SmallVector<const SCEV *, 8> NewOps(AddRec->op_begin(), + AddRec->op_begin()+i); + NewOps.push_back(OpAtScope); + for (++i; i != e; ++i) + NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L)); + + AddRec = cast<SCEVAddRecExpr>(getAddRecExpr(NewOps, AddRec->getLoop())); + break; + } + + // If the scope is outside the addrec's loop, evaluate it by using the + // loop exit value of the addrec. + if (!AddRec->getLoop()->contains(L)) { + // To evaluate this recurrence, we need to know how many times the AddRec + // loop iterates. Compute this now. + const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop()); + if (BackedgeTakenCount == getCouldNotCompute()) return AddRec; + + // Then, evaluate the AddRec. + return AddRec->evaluateAtIteration(BackedgeTakenCount, *this); + } + + return AddRec; + } + + if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) { + const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); + if (Op == Cast->getOperand()) + return Cast; // must be loop invariant + return getZeroExtendExpr(Op, Cast->getType()); + } + + if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) { + const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); + if (Op == Cast->getOperand()) + return Cast; // must be loop invariant + return getSignExtendExpr(Op, Cast->getType()); + } + + if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) { + const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); + if (Op == Cast->getOperand()) + return Cast; // must be loop invariant + return getTruncateExpr(Op, Cast->getType()); + } + + llvm_unreachable("Unknown SCEV type!"); + return 0; +} + +/// getSCEVAtScope - This is a convenience function which does +/// getSCEVAtScope(getSCEV(V), L). +const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) { + return getSCEVAtScope(getSCEV(V), L); +} + +/// SolveLinEquationWithOverflow - Finds the minimum unsigned root of the +/// following equation: +/// +/// A * X = B (mod N) +/// +/// where N = 2^BW and BW is the common bit width of A and B. The signedness of +/// A and B isn't important. +/// +/// If the equation does not have a solution, SCEVCouldNotCompute is returned. +static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B, + ScalarEvolution &SE) { + uint32_t BW = A.getBitWidth(); + assert(BW == B.getBitWidth() && "Bit widths must be the same."); + assert(A != 0 && "A must be non-zero."); + + // 1. D = gcd(A, N) + // + // The gcd of A and N may have only one prime factor: 2. The number of + // trailing zeros in A is its multiplicity + uint32_t Mult2 = A.countTrailingZeros(); + // D = 2^Mult2 + + // 2. Check if B is divisible by D. + // + // B is divisible by D if and only if the multiplicity of prime factor 2 for B + // is not less than multiplicity of this prime factor for D. + if (B.countTrailingZeros() < Mult2) + return SE.getCouldNotCompute(); + + // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic + // modulo (N / D). + // + // (N / D) may need BW+1 bits in its representation. Hence, we'll use this + // bit width during computations. + APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D + APInt Mod(BW + 1, 0); + Mod.set(BW - Mult2); // Mod = N / D + APInt I = AD.multiplicativeInverse(Mod); + + // 4. Compute the minimum unsigned root of the equation: + // I * (B / D) mod (N / D) + APInt Result = (I * B.lshr(Mult2).zext(BW + 1)).urem(Mod); + + // The result is guaranteed to be less than 2^BW so we may truncate it to BW + // bits. + return SE.getConstant(Result.trunc(BW)); +} + +/// SolveQuadraticEquation - Find the roots of the quadratic equation for the +/// given quadratic chrec {L,+,M,+,N}. This returns either the two roots (which +/// might be the same) or two SCEVCouldNotCompute objects. +/// +static std::pair<const SCEV *,const SCEV *> +SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { + assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!"); + const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0)); + const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1)); + const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2)); + + // We currently can only solve this if the coefficients are constants. + if (!LC || !MC || !NC) { + const SCEV *CNC = SE.getCouldNotCompute(); + return std::make_pair(CNC, CNC); + } + + uint32_t BitWidth = LC->getValue()->getValue().getBitWidth(); + const APInt &L = LC->getValue()->getValue(); + const APInt &M = MC->getValue()->getValue(); + const APInt &N = NC->getValue()->getValue(); + APInt Two(BitWidth, 2); + APInt Four(BitWidth, 4); + + { + using namespace APIntOps; + const APInt& C = L; + // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C + // The B coefficient is M-N/2 + APInt B(M); + B -= sdiv(N,Two); + + // The A coefficient is N/2 + APInt A(N.sdiv(Two)); + + // Compute the B^2-4ac term. + APInt SqrtTerm(B); + SqrtTerm *= B; + SqrtTerm -= Four * (A * C); + + // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest + // integer value or else APInt::sqrt() will assert. + APInt SqrtVal(SqrtTerm.sqrt()); + + // Compute the two solutions for the quadratic formula. + // The divisions must be performed as signed divisions. + APInt NegB(-B); + APInt TwoA( A << 1 ); + if (TwoA.isMinValue()) { + const SCEV *CNC = SE.getCouldNotCompute(); + return std::make_pair(CNC, CNC); + } + + LLVMContext &Context = SE.getContext(); + + ConstantInt *Solution1 = + ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA)); + ConstantInt *Solution2 = + ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA)); + + return std::make_pair(SE.getConstant(Solution1), + SE.getConstant(Solution2)); + } // end APIntOps namespace +} + +/// HowFarToZero - Return the number of times a backedge comparing the specified +/// value to zero will execute. If not computable, return CouldNotCompute. +ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { + // If the value is a constant + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) { + // If the value is already zero, the branch will execute zero times. + if (C->getValue()->isZero()) return C; + return getCouldNotCompute(); // Otherwise it will loop infinitely. + } + + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V); + if (!AddRec || AddRec->getLoop() != L) + return getCouldNotCompute(); + + if (AddRec->isAffine()) { + // If this is an affine expression, the execution count of this branch is + // the minimum unsigned root of the following equation: + // + // Start + Step*N = 0 (mod 2^BW) + // + // equivalent to: + // + // Step*N = -Start (mod 2^BW) + // + // where BW is the common bit width of Start and Step. + + // Get the initial value for the loop. + const SCEV *Start = getSCEVAtScope(AddRec->getStart(), + L->getParentLoop()); + const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), + L->getParentLoop()); + + if (const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step)) { + // For now we handle only constant steps. + + // First, handle unitary steps. + if (StepC->getValue()->equalsInt(1)) // 1*N = -Start (mod 2^BW), so: + return getNegativeSCEV(Start); // N = -Start (as unsigned) + if (StepC->getValue()->isAllOnesValue()) // -1*N = -Start (mod 2^BW), so: + return Start; // N = Start (as unsigned) + + // Then, try to solve the above equation provided that Start is constant. + if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) + return SolveLinEquationWithOverflow(StepC->getValue()->getValue(), + -StartC->getValue()->getValue(), + *this); + } + } else if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) { + // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of + // the quadratic equation to solve it. + std::pair<const SCEV *,const SCEV *> Roots = SolveQuadraticEquation(AddRec, + *this); + const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first); + const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second); + if (R1) { +#if 0 + dbgs() << "HFTZ: " << *V << " - sol#1: " << *R1 + << " sol#2: " << *R2 << "\n"; +#endif + // Pick the smallest positive root value. + if (ConstantInt *CB = + dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT, + R1->getValue(), R2->getValue()))) { + if (CB->getZExtValue() == false) + std::swap(R1, R2); // R1 is the minimum root now. + + // We can only use this value if the chrec ends up with an exact zero + // value at this index. When solving for "X*X != 5", for example, we + // should not accept a root of 2. + const SCEV *Val = AddRec->evaluateAtIteration(R1, *this); + if (Val->isZero()) + return R1; // We found a quadratic root! + } + } + } + + return getCouldNotCompute(); +} + +/// HowFarToNonZero - Return the number of times a backedge checking the +/// specified value for nonzero will execute. If not computable, return +/// CouldNotCompute +ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) { + // Loops that look like: while (X == 0) are very strange indeed. We don't + // handle them yet except for the trivial case. This could be expanded in the + // future as needed. + + // If the value is a constant, check to see if it is known to be non-zero + // already. If so, the backedge will execute zero times. + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) { + if (!C->getValue()->isNullValue()) + return getConstant(C->getType(), 0); + return getCouldNotCompute(); // Otherwise it will loop infinitely. + } + + // We could implement others, but I really doubt anyone writes loops like + // this, and if they did, they would already be constant folded. + return getCouldNotCompute(); +} + +/// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB +/// (which may not be an immediate predecessor) which has exactly one +/// successor from which BB is reachable, or null if no such block is +/// found. +/// +std::pair<BasicBlock *, BasicBlock *> +ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) { + // If the block has a unique predecessor, then there is no path from the + // predecessor to the block that does not go through the direct edge + // from the predecessor to the block. + if (BasicBlock *Pred = BB->getSinglePredecessor()) + return std::make_pair(Pred, BB); + + // A loop's header is defined to be a block that dominates the loop. + // If the header has a unique predecessor outside the loop, it must be + // a block that has exactly one successor that can reach the loop. + if (Loop *L = LI->getLoopFor(BB)) + return std::make_pair(L->getLoopPredecessor(), L->getHeader()); + + return std::pair<BasicBlock *, BasicBlock *>(); +} + +/// HasSameValue - SCEV structural equivalence is usually sufficient for +/// testing whether two expressions are equal, however for the purposes of +/// looking for a condition guarding a loop, it can be useful to be a little +/// more general, since a front-end may have replicated the controlling +/// expression. +/// +static bool HasSameValue(const SCEV *A, const SCEV *B) { + // Quick check to see if they are the same SCEV. + if (A == B) return true; + + // Otherwise, if they're both SCEVUnknown, it's possible that they hold + // two different instructions with the same value. Check for this case. + if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A)) + if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B)) + if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue())) + if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue())) + if (AI->isIdenticalTo(BI) && !AI->mayReadFromMemory()) + return true; + + // Otherwise assume they may have a different value. + return false; +} + +/// SimplifyICmpOperands - Simplify LHS and RHS in a comparison with +/// predicate Pred. Return true iff any changes were made. +/// +bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, + const SCEV *&LHS, const SCEV *&RHS) { + bool Changed = false; + + // Canonicalize a constant to the right side. + if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) { + // Check for both operands constant. + if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) { + if (ConstantExpr::getICmp(Pred, + LHSC->getValue(), + RHSC->getValue())->isNullValue()) + goto trivially_false; + else + goto trivially_true; + } + // Otherwise swap the operands to put the constant on the right. + std::swap(LHS, RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + Changed = true; + } + + // If we're comparing an addrec with a value which is loop-invariant in the + // addrec's loop, put the addrec on the left. Also make a dominance check, + // as both operands could be addrecs loop-invariant in each other's loop. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) { + const Loop *L = AR->getLoop(); + if (LHS->isLoopInvariant(L) && LHS->properlyDominates(L->getHeader(), DT)) { + std::swap(LHS, RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + Changed = true; + } + } + + // If there's a constant operand, canonicalize comparisons with boundary + // cases, and canonicalize *-or-equal comparisons to regular comparisons. + if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) { + const APInt &RA = RC->getValue()->getValue(); + switch (Pred) { + default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_NE: + break; + case ICmpInst::ICMP_UGE: + if ((RA - 1).isMinValue()) { + Pred = ICmpInst::ICMP_NE; + RHS = getConstant(RA - 1); + Changed = true; + break; + } + if (RA.isMaxValue()) { + Pred = ICmpInst::ICMP_EQ; + Changed = true; + break; + } + if (RA.isMinValue()) goto trivially_true; + + Pred = ICmpInst::ICMP_UGT; + RHS = getConstant(RA - 1); + Changed = true; + break; + case ICmpInst::ICMP_ULE: + if ((RA + 1).isMaxValue()) { + Pred = ICmpInst::ICMP_NE; + RHS = getConstant(RA + 1); + Changed = true; + break; + } + if (RA.isMinValue()) { + Pred = ICmpInst::ICMP_EQ; + Changed = true; + break; + } + if (RA.isMaxValue()) goto trivially_true; + + Pred = ICmpInst::ICMP_ULT; + RHS = getConstant(RA + 1); + Changed = true; + break; + case ICmpInst::ICMP_SGE: + if ((RA - 1).isMinSignedValue()) { + Pred = ICmpInst::ICMP_NE; + RHS = getConstant(RA - 1); + Changed = true; + break; + } + if (RA.isMaxSignedValue()) { + Pred = ICmpInst::ICMP_EQ; + Changed = true; + break; + } + if (RA.isMinSignedValue()) goto trivially_true; + + Pred = ICmpInst::ICMP_SGT; + RHS = getConstant(RA - 1); + Changed = true; + break; + case ICmpInst::ICMP_SLE: + if ((RA + 1).isMaxSignedValue()) { + Pred = ICmpInst::ICMP_NE; + RHS = getConstant(RA + 1); + Changed = true; + break; + } + if (RA.isMinSignedValue()) { + Pred = ICmpInst::ICMP_EQ; + Changed = true; + break; + } + if (RA.isMaxSignedValue()) goto trivially_true; + + Pred = ICmpInst::ICMP_SLT; + RHS = getConstant(RA + 1); + Changed = true; + break; + case ICmpInst::ICMP_UGT: + if (RA.isMinValue()) { + Pred = ICmpInst::ICMP_NE; + Changed = true; + break; + } + if ((RA + 1).isMaxValue()) { + Pred = ICmpInst::ICMP_EQ; + RHS = getConstant(RA + 1); + Changed = true; + break; + } + if (RA.isMaxValue()) goto trivially_false; + break; + case ICmpInst::ICMP_ULT: + if (RA.isMaxValue()) { + Pred = ICmpInst::ICMP_NE; + Changed = true; + break; + } + if ((RA - 1).isMinValue()) { + Pred = ICmpInst::ICMP_EQ; + RHS = getConstant(RA - 1); + Changed = true; + break; + } + if (RA.isMinValue()) goto trivially_false; + break; + case ICmpInst::ICMP_SGT: + if (RA.isMinSignedValue()) { + Pred = ICmpInst::ICMP_NE; + Changed = true; + break; + } + if ((RA + 1).isMaxSignedValue()) { + Pred = ICmpInst::ICMP_EQ; + RHS = getConstant(RA + 1); + Changed = true; + break; + } + if (RA.isMaxSignedValue()) goto trivially_false; + break; + case ICmpInst::ICMP_SLT: + if (RA.isMaxSignedValue()) { + Pred = ICmpInst::ICMP_NE; + Changed = true; + break; + } + if ((RA - 1).isMinSignedValue()) { + Pred = ICmpInst::ICMP_EQ; + RHS = getConstant(RA - 1); + Changed = true; + break; + } + if (RA.isMinSignedValue()) goto trivially_false; + break; + } + } + + // Check for obvious equality. + if (HasSameValue(LHS, RHS)) { + if (ICmpInst::isTrueWhenEqual(Pred)) + goto trivially_true; + if (ICmpInst::isFalseWhenEqual(Pred)) + goto trivially_false; + } + + // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by + // adding or subtracting 1 from one of the operands. + switch (Pred) { + case ICmpInst::ICMP_SLE: + if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) { + RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, + /*HasNUW=*/false, /*HasNSW=*/true); + Pred = ICmpInst::ICMP_SLT; + Changed = true; + } else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) { + LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS, + /*HasNUW=*/false, /*HasNSW=*/true); + Pred = ICmpInst::ICMP_SLT; + Changed = true; + } + break; + case ICmpInst::ICMP_SGE: + if (!getSignedRange(RHS).getSignedMin().isMinSignedValue()) { + RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS, + /*HasNUW=*/false, /*HasNSW=*/true); + Pred = ICmpInst::ICMP_SGT; + Changed = true; + } else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) { + LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, + /*HasNUW=*/false, /*HasNSW=*/true); + Pred = ICmpInst::ICMP_SGT; + Changed = true; + } + break; + case ICmpInst::ICMP_ULE: + if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) { + RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, + /*HasNUW=*/true, /*HasNSW=*/false); + Pred = ICmpInst::ICMP_ULT; + Changed = true; + } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) { + LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS, + /*HasNUW=*/true, /*HasNSW=*/false); + Pred = ICmpInst::ICMP_ULT; + Changed = true; + } + break; + case ICmpInst::ICMP_UGE: + if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) { + RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS, + /*HasNUW=*/true, /*HasNSW=*/false); + Pred = ICmpInst::ICMP_UGT; + Changed = true; + } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) { + LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, + /*HasNUW=*/true, /*HasNSW=*/false); + Pred = ICmpInst::ICMP_UGT; + Changed = true; + } + break; + default: + break; + } + + // TODO: More simplifications are possible here. + + return Changed; + +trivially_true: + // Return 0 == 0. + LHS = RHS = getConstant(Type::getInt1Ty(getContext()), 0); + Pred = ICmpInst::ICMP_EQ; + return true; + +trivially_false: + // Return 0 != 0. + LHS = RHS = getConstant(Type::getInt1Ty(getContext()), 0); + Pred = ICmpInst::ICMP_NE; + return true; +} + +bool ScalarEvolution::isKnownNegative(const SCEV *S) { + return getSignedRange(S).getSignedMax().isNegative(); +} + +bool ScalarEvolution::isKnownPositive(const SCEV *S) { + return getSignedRange(S).getSignedMin().isStrictlyPositive(); +} + +bool ScalarEvolution::isKnownNonNegative(const SCEV *S) { + return !getSignedRange(S).getSignedMin().isNegative(); +} + +bool ScalarEvolution::isKnownNonPositive(const SCEV *S) { + return !getSignedRange(S).getSignedMax().isStrictlyPositive(); +} + +bool ScalarEvolution::isKnownNonZero(const SCEV *S) { + return isKnownNegative(S) || isKnownPositive(S); +} + +bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + // Canonicalize the inputs first. + (void)SimplifyICmpOperands(Pred, LHS, RHS); + + // If LHS or RHS is an addrec, check to see if the condition is true in + // every iteration of the loop. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) + if (isLoopEntryGuardedByCond( + AR->getLoop(), Pred, AR->getStart(), RHS) && + isLoopBackedgeGuardedByCond( + AR->getLoop(), Pred, AR->getPostIncExpr(*this), RHS)) + return true; + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) + if (isLoopEntryGuardedByCond( + AR->getLoop(), Pred, LHS, AR->getStart()) && + isLoopBackedgeGuardedByCond( + AR->getLoop(), Pred, LHS, AR->getPostIncExpr(*this))) + return true; + + // Otherwise see what can be done with known constant ranges. + return isKnownPredicateWithRanges(Pred, LHS, RHS); +} + +bool +ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + if (HasSameValue(LHS, RHS)) + return ICmpInst::isTrueWhenEqual(Pred); + + // This code is split out from isKnownPredicate because it is called from + // within isLoopEntryGuardedByCond. + switch (Pred) { + default: + llvm_unreachable("Unexpected ICmpInst::Predicate value!"); + break; + case ICmpInst::ICMP_SGT: + Pred = ICmpInst::ICMP_SLT; + std::swap(LHS, RHS); + case ICmpInst::ICMP_SLT: { + ConstantRange LHSRange = getSignedRange(LHS); + ConstantRange RHSRange = getSignedRange(RHS); + if (LHSRange.getSignedMax().slt(RHSRange.getSignedMin())) + return true; + if (LHSRange.getSignedMin().sge(RHSRange.getSignedMax())) + return false; + break; + } + case ICmpInst::ICMP_SGE: + Pred = ICmpInst::ICMP_SLE; + std::swap(LHS, RHS); + case ICmpInst::ICMP_SLE: { + ConstantRange LHSRange = getSignedRange(LHS); + ConstantRange RHSRange = getSignedRange(RHS); + if (LHSRange.getSignedMax().sle(RHSRange.getSignedMin())) + return true; + if (LHSRange.getSignedMin().sgt(RHSRange.getSignedMax())) + return false; + break; + } + case ICmpInst::ICMP_UGT: + Pred = ICmpInst::ICMP_ULT; + std::swap(LHS, RHS); + case ICmpInst::ICMP_ULT: { + ConstantRange LHSRange = getUnsignedRange(LHS); + ConstantRange RHSRange = getUnsignedRange(RHS); + if (LHSRange.getUnsignedMax().ult(RHSRange.getUnsignedMin())) + return true; + if (LHSRange.getUnsignedMin().uge(RHSRange.getUnsignedMax())) + return false; + break; + } + case ICmpInst::ICMP_UGE: + Pred = ICmpInst::ICMP_ULE; + std::swap(LHS, RHS); + case ICmpInst::ICMP_ULE: { + ConstantRange LHSRange = getUnsignedRange(LHS); + ConstantRange RHSRange = getUnsignedRange(RHS); + if (LHSRange.getUnsignedMax().ule(RHSRange.getUnsignedMin())) + return true; + if (LHSRange.getUnsignedMin().ugt(RHSRange.getUnsignedMax())) + return false; + break; + } + case ICmpInst::ICMP_NE: { + if (getUnsignedRange(LHS).intersectWith(getUnsignedRange(RHS)).isEmptySet()) + return true; + if (getSignedRange(LHS).intersectWith(getSignedRange(RHS)).isEmptySet()) + return true; + + const SCEV *Diff = getMinusSCEV(LHS, RHS); + if (isKnownNonZero(Diff)) + return true; + break; + } + case ICmpInst::ICMP_EQ: + // The check at the top of the function catches the case where + // the values are known to be equal. + break; + } + return false; +} + +/// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is +/// protected by a conditional between LHS and RHS. This is used to +/// to eliminate casts. +bool +ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, + ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + // Interpret a null as meaning no loop, where there is obviously no guard + // (interprocedural conditions notwithstanding). + if (!L) return true; + + BasicBlock *Latch = L->getLoopLatch(); + if (!Latch) + return false; + + BranchInst *LoopContinuePredicate = + dyn_cast<BranchInst>(Latch->getTerminator()); + if (!LoopContinuePredicate || + LoopContinuePredicate->isUnconditional()) + return false; + + return isImpliedCond(Pred, LHS, RHS, + LoopContinuePredicate->getCondition(), + LoopContinuePredicate->getSuccessor(0) != L->getHeader()); +} + +/// isLoopEntryGuardedByCond - Test whether entry to the loop is protected +/// by a conditional between LHS and RHS. This is used to help avoid max +/// expressions in loop trip counts, and to eliminate casts. +bool +ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, + ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + // Interpret a null as meaning no loop, where there is obviously no guard + // (interprocedural conditions notwithstanding). + if (!L) return false; + + // Starting at the loop predecessor, climb up the predecessor chain, as long + // as there are predecessors that can be found that have unique successors + // leading to the original header. + for (std::pair<BasicBlock *, BasicBlock *> + Pair(L->getLoopPredecessor(), L->getHeader()); + Pair.first; + Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { + + BranchInst *LoopEntryPredicate = + dyn_cast<BranchInst>(Pair.first->getTerminator()); + if (!LoopEntryPredicate || + LoopEntryPredicate->isUnconditional()) + continue; + + if (isImpliedCond(Pred, LHS, RHS, + LoopEntryPredicate->getCondition(), + LoopEntryPredicate->getSuccessor(0) != Pair.second)) + return true; + } + + return false; +} + +/// isImpliedCond - Test whether the condition described by Pred, LHS, +/// and RHS is true whenever the given Cond value evaluates to true. +bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + Value *FoundCondValue, + bool Inverse) { + // Recursively handle And and Or conditions. + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) { + if (BO->getOpcode() == Instruction::And) { + if (!Inverse) + return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) || + isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse); + } else if (BO->getOpcode() == Instruction::Or) { + if (Inverse) + return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) || + isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse); + } + } + + ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue); + if (!ICI) return false; + + // Bail if the ICmp's operands' types are wider than the needed type + // before attempting to call getSCEV on them. This avoids infinite + // recursion, since the analysis of widening casts can require loop + // exit condition information for overflow checking, which would + // lead back here. + if (getTypeSizeInBits(LHS->getType()) < + getTypeSizeInBits(ICI->getOperand(0)->getType())) + return false; + + // Now that we found a conditional branch that dominates the loop, check to + // see if it is the comparison we are looking for. + ICmpInst::Predicate FoundPred; + if (Inverse) + FoundPred = ICI->getInversePredicate(); + else + FoundPred = ICI->getPredicate(); + + const SCEV *FoundLHS = getSCEV(ICI->getOperand(0)); + const SCEV *FoundRHS = getSCEV(ICI->getOperand(1)); + + // Balance the types. The case where FoundLHS' type is wider than + // LHS' type is checked for above. + if (getTypeSizeInBits(LHS->getType()) > + getTypeSizeInBits(FoundLHS->getType())) { + if (CmpInst::isSigned(Pred)) { + FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType()); + FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType()); + } else { + FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType()); + FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType()); + } + } + + // Canonicalize the query to match the way instcombine will have + // canonicalized the comparison. + if (SimplifyICmpOperands(Pred, LHS, RHS)) + if (LHS == RHS) + return CmpInst::isTrueWhenEqual(Pred); + if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS)) + if (FoundLHS == FoundRHS) + return CmpInst::isFalseWhenEqual(Pred); + + // Check to see if we can make the LHS or RHS match. + if (LHS == FoundRHS || RHS == FoundLHS) { + if (isa<SCEVConstant>(RHS)) { + std::swap(FoundLHS, FoundRHS); + FoundPred = ICmpInst::getSwappedPredicate(FoundPred); + } else { + std::swap(LHS, RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + } + } + + // Check whether the found predicate is the same as the desired predicate. + if (FoundPred == Pred) + return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS); + + // Check whether swapping the found predicate makes it the same as the + // desired predicate. + if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) { + if (isa<SCEVConstant>(RHS)) + return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS); + else + return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred), + RHS, LHS, FoundLHS, FoundRHS); + } + + // Check whether the actual condition is beyond sufficient. + if (FoundPred == ICmpInst::ICMP_EQ) + if (ICmpInst::isTrueWhenEqual(Pred)) + if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS)) + return true; + if (Pred == ICmpInst::ICMP_NE) + if (!ICmpInst::isTrueWhenEqual(FoundPred)) + if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS)) + return true; + + // Otherwise assume the worst. + return false; +} + +/// isImpliedCondOperands - Test whether the condition described by Pred, +/// LHS, and RHS is true whenever the condition described by Pred, FoundLHS, +/// and FoundRHS is true. +bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, + const SCEV *FoundRHS) { + return isImpliedCondOperandsHelper(Pred, LHS, RHS, + FoundLHS, FoundRHS) || + // ~x < ~y --> x > y + isImpliedCondOperandsHelper(Pred, LHS, RHS, + getNotSCEV(FoundRHS), + getNotSCEV(FoundLHS)); +} + +/// isImpliedCondOperandsHelper - Test whether the condition described by +/// Pred, LHS, and RHS is true whenever the condition described by Pred, +/// FoundLHS, and FoundRHS is true. +bool +ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, + const SCEV *FoundRHS) { + switch (Pred) { + default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_NE: + if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS)) + return true; + break; + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + if (isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, LHS, FoundLHS) && + isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, RHS, FoundRHS)) + return true; + break; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + if (isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, LHS, FoundLHS) && + isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, RHS, FoundRHS)) + return true; + break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + if (isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, LHS, FoundLHS) && + isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, RHS, FoundRHS)) + return true; + break; + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + if (isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, LHS, FoundLHS) && + isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, RHS, FoundRHS)) + return true; + break; + } + + return false; +} + +/// getBECount - Subtract the end and start values and divide by the step, +/// rounding up, to get the number of times the backedge is executed. Return +/// CouldNotCompute if an intermediate computation overflows. +const SCEV *ScalarEvolution::getBECount(const SCEV *Start, + const SCEV *End, + const SCEV *Step, + bool NoWrap) { + assert(!isKnownNegative(Step) && + "This code doesn't handle negative strides yet!"); + + const Type *Ty = Start->getType(); + const SCEV *NegOne = getConstant(Ty, (uint64_t)-1); + const SCEV *Diff = getMinusSCEV(End, Start); + const SCEV *RoundUp = getAddExpr(Step, NegOne); + + // Add an adjustment to the difference between End and Start so that + // the division will effectively round up. + const SCEV *Add = getAddExpr(Diff, RoundUp); + + if (!NoWrap) { + // Check Add for unsigned overflow. + // TODO: More sophisticated things could be done here. + const Type *WideTy = IntegerType::get(getContext(), + getTypeSizeInBits(Ty) + 1); + const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy); + const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy); + const SCEV *OperandExtendedAdd = getAddExpr(EDiff, ERoundUp); + if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd) + return getCouldNotCompute(); + } + + return getUDivExpr(Add, Step); +} + +/// HowManyLessThans - Return the number of times a backedge containing the +/// specified less-than comparison will execute. If not computable, return +/// CouldNotCompute. +ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, + const Loop *L, bool isSigned) { + // Only handle: "ADDREC < LoopInvariant". + if (!RHS->isLoopInvariant(L)) return getCouldNotCompute(); + + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS); + if (!AddRec || AddRec->getLoop() != L) + return getCouldNotCompute(); + + // Check to see if we have a flag which makes analysis easy. + bool NoWrap = isSigned ? AddRec->hasNoSignedWrap() : + AddRec->hasNoUnsignedWrap(); + + if (AddRec->isAffine()) { + unsigned BitWidth = getTypeSizeInBits(AddRec->getType()); + const SCEV *Step = AddRec->getStepRecurrence(*this); + + if (Step->isZero()) + return getCouldNotCompute(); + if (Step->isOne()) { + // With unit stride, the iteration never steps past the limit value. + } else if (isKnownPositive(Step)) { + // Test whether a positive iteration can step past the limit + // value and past the maximum value for its type in a single step. + // Note that it's not sufficient to check NoWrap here, because even + // though the value after a wrap is undefined, it's not undefined + // behavior, so if wrap does occur, the loop could either terminate or + // loop infinitely, but in either case, the loop is guaranteed to + // iterate at least until the iteration where the wrapping occurs. + const SCEV *One = getConstant(Step->getType(), 1); + if (isSigned) { + APInt Max = APInt::getSignedMaxValue(BitWidth); + if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax()) + .slt(getSignedRange(RHS).getSignedMax())) + return getCouldNotCompute(); + } else { + APInt Max = APInt::getMaxValue(BitWidth); + if ((Max - getUnsignedRange(getMinusSCEV(Step, One)).getUnsignedMax()) + .ult(getUnsignedRange(RHS).getUnsignedMax())) + return getCouldNotCompute(); + } + } else + // TODO: Handle negative strides here and below. + return getCouldNotCompute(); + + // We know the LHS is of the form {n,+,s} and the RHS is some loop-invariant + // m. So, we count the number of iterations in which {n,+,s} < m is true. + // Note that we cannot simply return max(m-n,0)/s because it's not safe to + // treat m-n as signed nor unsigned due to overflow possibility. + + // First, we get the value of the LHS in the first iteration: n + const SCEV *Start = AddRec->getOperand(0); + + // Determine the minimum constant start value. + const SCEV *MinStart = getConstant(isSigned ? + getSignedRange(Start).getSignedMin() : + getUnsignedRange(Start).getUnsignedMin()); + + // If we know that the condition is true in order to enter the loop, + // then we know that it will run exactly (m-n)/s times. Otherwise, we + // only know that it will execute (max(m,n)-n)/s times. In both cases, + // the division must round up. + const SCEV *End = RHS; + if (!isLoopEntryGuardedByCond(L, + isSigned ? ICmpInst::ICMP_SLT : + ICmpInst::ICMP_ULT, + getMinusSCEV(Start, Step), RHS)) + End = isSigned ? getSMaxExpr(RHS, Start) + : getUMaxExpr(RHS, Start); + + // Determine the maximum constant end value. + const SCEV *MaxEnd = getConstant(isSigned ? + getSignedRange(End).getSignedMax() : + getUnsignedRange(End).getUnsignedMax()); + + // If MaxEnd is within a step of the maximum integer value in its type, + // adjust it down to the minimum value which would produce the same effect. + // This allows the subsequent ceiling division of (N+(step-1))/step to + // compute the correct value. + const SCEV *StepMinusOne = getMinusSCEV(Step, + getConstant(Step->getType(), 1)); + MaxEnd = isSigned ? + getSMinExpr(MaxEnd, + getMinusSCEV(getConstant(APInt::getSignedMaxValue(BitWidth)), + StepMinusOne)) : + getUMinExpr(MaxEnd, + getMinusSCEV(getConstant(APInt::getMaxValue(BitWidth)), + StepMinusOne)); + + // Finally, we subtract these two values and divide, rounding up, to get + // the number of times the backedge is executed. + const SCEV *BECount = getBECount(Start, End, Step, NoWrap); + + // The maximum backedge count is similar, except using the minimum start + // value and the maximum end value. + const SCEV *MaxBECount = getBECount(MinStart, MaxEnd, Step, NoWrap); + + return BackedgeTakenInfo(BECount, MaxBECount); + } + + return getCouldNotCompute(); +} + +/// getNumIterationsInRange - Return the number of iterations of this loop that +/// produce values in the specified constant range. Another way of looking at +/// this is that it returns the first iteration number where the value is not in +/// the condition, thus computing the exit count. If the iteration count can't +/// be computed, an instance of SCEVCouldNotCompute is returned. +const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, + ScalarEvolution &SE) const { + if (Range.isFullSet()) // Infinite loop. + return SE.getCouldNotCompute(); + + // If the start is a non-zero constant, shift the range to simplify things. + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart())) + if (!SC->getValue()->isZero()) { + SmallVector<const SCEV *, 4> Operands(op_begin(), op_end()); + Operands[0] = SE.getConstant(SC->getType(), 0); + const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop()); + if (const SCEVAddRecExpr *ShiftedAddRec = + dyn_cast<SCEVAddRecExpr>(Shifted)) + return ShiftedAddRec->getNumIterationsInRange( + Range.subtract(SC->getValue()->getValue()), SE); + // This is strange and shouldn't happen. + return SE.getCouldNotCompute(); + } + + // The only time we can solve this is when we have all constant indices. + // Otherwise, we cannot determine the overflow conditions. + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) + if (!isa<SCEVConstant>(getOperand(i))) + return SE.getCouldNotCompute(); + + + // Okay at this point we know that all elements of the chrec are constants and + // that the start element is zero. + + // First check to see if the range contains zero. If not, the first + // iteration exits. + unsigned BitWidth = SE.getTypeSizeInBits(getType()); + if (!Range.contains(APInt(BitWidth, 0))) + return SE.getConstant(getType(), 0); + + if (isAffine()) { + // If this is an affine expression then we have this situation: + // Solve {0,+,A} in Range === Ax in Range + + // We know that zero is in the range. If A is positive then we know that + // the upper value of the range must be the first possible exit value. + // If A is negative then the lower of the range is the last possible loop + // value. Also note that we already checked for a full range. + APInt One(BitWidth,1); + APInt A = cast<SCEVConstant>(getOperand(1))->getValue()->getValue(); + APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower(); + + // The exit value should be (End+A)/A. + APInt ExitVal = (End + A).udiv(A); + ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal); + + // Evaluate at the exit value. If we really did fall out of the valid + // range, then we computed our trip count, otherwise wrap around or other + // things must have happened. + ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE); + if (Range.contains(Val->getValue())) + return SE.getCouldNotCompute(); // Something strange happened + + // Ensure that the previous value is in the range. This is a sanity check. + assert(Range.contains( + EvaluateConstantChrecAtConstant(this, + ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) && + "Linear scev computation is off in a bad way!"); + return SE.getConstant(ExitValue); + } else if (isQuadratic()) { + // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the + // quadratic equation to solve it. To do this, we must frame our problem in + // terms of figuring out when zero is crossed, instead of when + // Range.getUpper() is crossed. + SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end()); + NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper())); + const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop()); + + // Next, solve the constructed addrec + std::pair<const SCEV *,const SCEV *> Roots = + SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE); + const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first); + const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second); + if (R1) { + // Pick the smallest positive root value. + if (ConstantInt *CB = + dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT, + R1->getValue(), R2->getValue()))) { + if (CB->getZExtValue() == false) + std::swap(R1, R2); // R1 is the minimum root now. + + // Make sure the root is not off by one. The returned iteration should + // not be in the range, but the previous one should be. When solving + // for "X*X < 5", for example, we should not return a root of 2. + ConstantInt *R1Val = EvaluateConstantChrecAtConstant(this, + R1->getValue(), + SE); + if (Range.contains(R1Val->getValue())) { + // The next iteration must be out of the range... + ConstantInt *NextVal = + ConstantInt::get(SE.getContext(), R1->getValue()->getValue()+1); + + R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); + if (!Range.contains(R1Val->getValue())) + return SE.getConstant(NextVal); + return SE.getCouldNotCompute(); // Something strange happened + } + + // If R1 was not in the range, then it is a good return value. Make + // sure that R1-1 WAS in the range though, just in case. + ConstantInt *NextVal = + ConstantInt::get(SE.getContext(), R1->getValue()->getValue()-1); + R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); + if (Range.contains(R1Val->getValue())) + return R1; + return SE.getCouldNotCompute(); // Something strange happened + } + } + } + + return SE.getCouldNotCompute(); +} + + + +//===----------------------------------------------------------------------===// +// SCEVCallbackVH Class Implementation +//===----------------------------------------------------------------------===// + +void ScalarEvolution::SCEVCallbackVH::deleted() { + assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!"); + if (PHINode *PN = dyn_cast<PHINode>(getValPtr())) + SE->ConstantEvolutionLoopExitValue.erase(PN); + SE->ValueExprMap.erase(getValPtr()); + // this now dangles! +} + +void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) { + assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!"); + + // Forget all the expressions associated with users of the old value, + // so that future queries will recompute the expressions using the new + // value. + Value *Old = getValPtr(); + SmallVector<User *, 16> Worklist; + SmallPtrSet<User *, 8> Visited; + for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end(); + UI != UE; ++UI) + Worklist.push_back(*UI); + while (!Worklist.empty()) { + User *U = Worklist.pop_back_val(); + // Deleting the Old value will cause this to dangle. Postpone + // that until everything else is done. + if (U == Old) + continue; + if (!Visited.insert(U)) + continue; + if (PHINode *PN = dyn_cast<PHINode>(U)) + SE->ConstantEvolutionLoopExitValue.erase(PN); + SE->ValueExprMap.erase(U); + for (Value::use_iterator UI = U->use_begin(), UE = U->use_end(); + UI != UE; ++UI) + Worklist.push_back(*UI); + } + // Delete the Old value. + if (PHINode *PN = dyn_cast<PHINode>(Old)) + SE->ConstantEvolutionLoopExitValue.erase(PN); + SE->ValueExprMap.erase(Old); + // this now dangles! +} + +ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) + : CallbackVH(V), SE(se) {} + +//===----------------------------------------------------------------------===// +// ScalarEvolution Class Implementation +//===----------------------------------------------------------------------===// + +ScalarEvolution::ScalarEvolution() + : FunctionPass(ID), FirstUnknown(0) { +} + +bool ScalarEvolution::runOnFunction(Function &F) { + this->F = &F; + LI = &getAnalysis<LoopInfo>(); + TD = getAnalysisIfAvailable<TargetData>(); + DT = &getAnalysis<DominatorTree>(); + return false; +} + +void ScalarEvolution::releaseMemory() { + // Iterate through all the SCEVUnknown instances and call their + // destructors, so that they release their references to their values. + for (SCEVUnknown *U = FirstUnknown; U; U = U->Next) + U->~SCEVUnknown(); + FirstUnknown = 0; + + ValueExprMap.clear(); + BackedgeTakenCounts.clear(); + ConstantEvolutionLoopExitValue.clear(); + ValuesAtScopes.clear(); + UniqueSCEVs.clear(); + SCEVAllocator.Reset(); +} + +void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<LoopInfo>(); + AU.addRequiredTransitive<DominatorTree>(); +} + +bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) { + return !isa<SCEVCouldNotCompute>(getBackedgeTakenCount(L)); +} + +static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, + const Loop *L) { + // Print all inner loops first + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + PrintLoopInfo(OS, SE, *I); + + OS << "Loop "; + WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false); + OS << ": "; + + SmallVector<BasicBlock *, 8> ExitBlocks; + L->getExitBlocks(ExitBlocks); + if (ExitBlocks.size() != 1) + OS << "<multiple exits> "; + + if (SE->hasLoopInvariantBackedgeTakenCount(L)) { + OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L); + } else { + OS << "Unpredictable backedge-taken count. "; + } + + OS << "\n" + "Loop "; + WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false); + OS << ": "; + + if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) { + OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L); + } else { + OS << "Unpredictable max backedge-taken count. "; + } + + OS << "\n"; +} + +void ScalarEvolution::print(raw_ostream &OS, const Module *) const { + // ScalarEvolution's implementation of the print method is to print + // out SCEV values of all instructions that are interesting. Doing + // this potentially causes it to create new SCEV objects though, + // which technically conflicts with the const qualifier. This isn't + // observable from outside the class though, so casting away the + // const isn't dangerous. + ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); + + OS << "Classifying expressions for: "; + WriteAsOperand(OS, F, /*PrintType=*/false); + OS << "\n"; + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) + if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) { + OS << *I << '\n'; + OS << " --> "; + const SCEV *SV = SE.getSCEV(&*I); + SV->print(OS); + + const Loop *L = LI->getLoopFor((*I).getParent()); + + const SCEV *AtUse = SE.getSCEVAtScope(SV, L); + if (AtUse != SV) { + OS << " --> "; + AtUse->print(OS); + } + + if (L) { + OS << "\t\t" "Exits: "; + const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop()); + if (!ExitValue->isLoopInvariant(L)) { + OS << "<<Unknown>>"; + } else { + OS << *ExitValue; + } + } + + OS << "\n"; + } + + OS << "Determining loop execution counts for: "; + WriteAsOperand(OS, F, /*PrintType=*/false); + OS << "\n"; + for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + PrintLoopInfo(OS, &SE, *I); +} + diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp new file mode 100644 index 0000000..93b2a8b --- /dev/null +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -0,0 +1,164 @@ +//===- ScalarEvolutionAliasAnalysis.cpp - SCEV-based Alias Analysis -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ScalarEvolutionAliasAnalysis pass, which implements a +// simple alias analysis implemented in terms of ScalarEvolution queries. +// +// This differs from traditional loop dependence analysis in that it tests +// for dependencies within a single iteration of a loop, rather than +// dependencies between different iterations. +// +// ScalarEvolution has a more complete understanding of pointer arithmetic +// than BasicAliasAnalysis' collection of ad-hoc analyses. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Pass.h" +using namespace llvm; + +namespace { + /// ScalarEvolutionAliasAnalysis - This is a simple alias analysis + /// implementation that uses ScalarEvolution to answer queries. + class ScalarEvolutionAliasAnalysis : public FunctionPass, + public AliasAnalysis { + ScalarEvolution *SE; + + public: + static char ID; // Class identification, replacement for typeinfo + ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(0) {} + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + + private: + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool runOnFunction(Function &F); + virtual AliasResult alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size); + + Value *GetBaseValue(const SCEV *S); + }; +} // End of anonymous namespace + +// Register this pass... +char ScalarEvolutionAliasAnalysis::ID = 0; +INITIALIZE_AG_PASS(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa", + "ScalarEvolution-based Alias Analysis", false, true, false); + +FunctionPass *llvm::createScalarEvolutionAliasAnalysisPass() { + return new ScalarEvolutionAliasAnalysis(); +} + +void +ScalarEvolutionAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredTransitive<ScalarEvolution>(); + AU.setPreservesAll(); + AliasAnalysis::getAnalysisUsage(AU); +} + +bool +ScalarEvolutionAliasAnalysis::runOnFunction(Function &F) { + InitializeAliasAnalysis(this); + SE = &getAnalysis<ScalarEvolution>(); + return false; +} + +/// GetBaseValue - Given an expression, try to find a +/// base value. Return null is none was found. +Value * +ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) { + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + // In an addrec, assume that the base will be in the start, rather + // than the step. + return GetBaseValue(AR->getStart()); + } else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) { + // If there's a pointer operand, it'll be sorted at the end of the list. + const SCEV *Last = A->getOperand(A->getNumOperands()-1); + if (Last->getType()->isPointerTy()) + return GetBaseValue(Last); + } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + // This is a leaf node. + return U->getValue(); + } + // No Identified object found. + return 0; +} + +AliasAnalysis::AliasResult +ScalarEvolutionAliasAnalysis::alias(const Value *A, unsigned ASize, + const Value *B, unsigned BSize) { + // If either of the memory references is empty, it doesn't matter what the + // pointer values are. This allows the code below to ignore this special + // case. + if (ASize == 0 || BSize == 0) + return NoAlias; + + // This is ScalarEvolutionAliasAnalysis. Get the SCEVs! + const SCEV *AS = SE->getSCEV(const_cast<Value *>(A)); + const SCEV *BS = SE->getSCEV(const_cast<Value *>(B)); + + // If they evaluate to the same expression, it's a MustAlias. + if (AS == BS) return MustAlias; + + // If something is known about the difference between the two addresses, + // see if it's enough to prove a NoAlias. + if (SE->getEffectiveSCEVType(AS->getType()) == + SE->getEffectiveSCEVType(BS->getType())) { + unsigned BitWidth = SE->getTypeSizeInBits(AS->getType()); + APInt ASizeInt(BitWidth, ASize); + APInt BSizeInt(BitWidth, BSize); + + // Compute the difference between the two pointers. + const SCEV *BA = SE->getMinusSCEV(BS, AS); + + // Test whether the difference is known to be great enough that memory of + // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt + // are non-zero, which is special-cased above. + if (ASizeInt.ule(SE->getUnsignedRange(BA).getUnsignedMin()) && + (-BSizeInt).uge(SE->getUnsignedRange(BA).getUnsignedMax())) + return NoAlias; + + // Folding the subtraction while preserving range information can be tricky + // (because of INT_MIN, etc.); if the prior test failed, swap AS and BS + // and try again to see if things fold better that way. + + // Compute the difference between the two pointers. + const SCEV *AB = SE->getMinusSCEV(AS, BS); + + // Test whether the difference is known to be great enough that memory of + // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt + // are non-zero, which is special-cased above. + if (BSizeInt.ule(SE->getUnsignedRange(AB).getUnsignedMin()) && + (-ASizeInt).uge(SE->getUnsignedRange(AB).getUnsignedMax())) + return NoAlias; + } + + // If ScalarEvolution can find an underlying object, form a new query. + // The correctness of this depends on ScalarEvolution not recognizing + // inttoptr and ptrtoint operators. + Value *AO = GetBaseValue(AS); + Value *BO = GetBaseValue(BS); + if ((AO && AO != A) || (BO && BO != B)) + if (alias(AO ? AO : A, AO ? UnknownSize : ASize, + BO ? BO : B, BO ? UnknownSize : BSize) == NoAlias) + return NoAlias; + + // Forward the query to the next analysis. + return AliasAnalysis::alias(A, ASize, B, BSize); +} diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp new file mode 100644 index 0000000..66a06ae --- /dev/null +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -0,0 +1,1364 @@ +//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the scalar evolution expander, +// which is used to generate the code corresponding to a given scalar evolution +// expression. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP, +/// reusing an existing cast if a suitable one exists, moving an existing +/// cast if a suitable one exists but isn't in the right place, or +/// creating a new one. +Value *SCEVExpander::ReuseOrCreateCast(Value *V, const Type *Ty, + Instruction::CastOps Op, + BasicBlock::iterator IP) { + // Check to see if there is already a cast! + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + User *U = *UI; + if (U->getType() == Ty) + if (CastInst *CI = dyn_cast<CastInst>(U)) + if (CI->getOpcode() == Op) { + // If the cast isn't where we want it, fix it. + if (BasicBlock::iterator(CI) != IP) { + // Create a new cast, and leave the old cast in place in case + // it is being used as an insert point. Clear its operand + // so that it doesn't hold anything live. + Instruction *NewCI = CastInst::Create(Op, V, Ty, "", IP); + NewCI->takeName(CI); + CI->replaceAllUsesWith(NewCI); + CI->setOperand(0, UndefValue::get(V->getType())); + rememberInstruction(NewCI); + return NewCI; + } + rememberInstruction(CI); + return CI; + } + } + + // Create a new cast. + Instruction *I = CastInst::Create(Op, V, Ty, V->getName(), IP); + rememberInstruction(I); + return I; +} + +/// InsertNoopCastOfTo - Insert a cast of V to the specified type, +/// which must be possible with a noop cast, doing what we can to share +/// the casts. +Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) { + Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false); + assert((Op == Instruction::BitCast || + Op == Instruction::PtrToInt || + Op == Instruction::IntToPtr) && + "InsertNoopCastOfTo cannot perform non-noop casts!"); + assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) && + "InsertNoopCastOfTo cannot change sizes!"); + + // Short-circuit unnecessary bitcasts. + if (Op == Instruction::BitCast && V->getType() == Ty) + return V; + + // Short-circuit unnecessary inttoptr<->ptrtoint casts. + if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) && + SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) { + if (CastInst *CI = dyn_cast<CastInst>(V)) + if ((CI->getOpcode() == Instruction::PtrToInt || + CI->getOpcode() == Instruction::IntToPtr) && + SE.getTypeSizeInBits(CI->getType()) == + SE.getTypeSizeInBits(CI->getOperand(0)->getType())) + return CI->getOperand(0); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + if ((CE->getOpcode() == Instruction::PtrToInt || + CE->getOpcode() == Instruction::IntToPtr) && + SE.getTypeSizeInBits(CE->getType()) == + SE.getTypeSizeInBits(CE->getOperand(0)->getType())) + return CE->getOperand(0); + } + + // Fold a cast of a constant. + if (Constant *C = dyn_cast<Constant>(V)) + return ConstantExpr::getCast(Op, C, Ty); + + // Cast the argument at the beginning of the entry block, after + // any bitcasts of other arguments. + if (Argument *A = dyn_cast<Argument>(V)) { + BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin(); + while ((isa<BitCastInst>(IP) && + isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) && + cast<BitCastInst>(IP)->getOperand(0) != A) || + isa<DbgInfoIntrinsic>(IP)) + ++IP; + return ReuseOrCreateCast(A, Ty, Op, IP); + } + + // Cast the instruction immediately after the instruction. + Instruction *I = cast<Instruction>(V); + BasicBlock::iterator IP = I; ++IP; + if (InvokeInst *II = dyn_cast<InvokeInst>(I)) + IP = II->getNormalDest()->begin(); + while (isa<PHINode>(IP) || isa<DbgInfoIntrinsic>(IP)) ++IP; + return ReuseOrCreateCast(I, Ty, Op, IP); +} + +/// InsertBinop - Insert the specified binary operator, doing a small amount +/// of work to avoid inserting an obviously redundant operation. +Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, + Value *LHS, Value *RHS) { + // Fold a binop with constant operands. + if (Constant *CLHS = dyn_cast<Constant>(LHS)) + if (Constant *CRHS = dyn_cast<Constant>(RHS)) + return ConstantExpr::get(Opcode, CLHS, CRHS); + + // Do a quick scan to see if we have this binop nearby. If so, reuse it. + unsigned ScanLimit = 6; + BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin(); + // Scanning starts from the last instruction before the insertion point. + BasicBlock::iterator IP = Builder.GetInsertPoint(); + if (IP != BlockBegin) { + --IP; + for (; ScanLimit; --IP, --ScanLimit) { + // Don't count dbg.value against the ScanLimit, to avoid perturbing the + // generated code. + if (isa<DbgInfoIntrinsic>(IP)) + ScanLimit++; + if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS && + IP->getOperand(1) == RHS) + return IP; + if (IP == BlockBegin) break; + } + } + + // Save the original insertion point so we can restore it when we're done. + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + + // Move the insertion point out of as many loops as we can. + while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { + if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break; + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) break; + + // Ok, move up a level. + Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); + } + + // If we haven't found this binop, insert it. + Value *BO = Builder.CreateBinOp(Opcode, LHS, RHS, "tmp"); + rememberInstruction(BO); + + // Restore the original insert point. + if (SaveInsertBB) + restoreInsertPoint(SaveInsertBB, SaveInsertPt); + + return BO; +} + +/// FactorOutConstant - Test if S is divisible by Factor, using signed +/// division. If so, update S with Factor divided out and return true. +/// S need not be evenly divisible if a reasonable remainder can be +/// computed. +/// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made +/// unnecessary; in its place, just signed-divide Ops[i] by the scale and +/// check to see if the divide was folded. +static bool FactorOutConstant(const SCEV *&S, + const SCEV *&Remainder, + const SCEV *Factor, + ScalarEvolution &SE, + const TargetData *TD) { + // Everything is divisible by one. + if (Factor->isOne()) + return true; + + // x/x == 1. + if (S == Factor) { + S = SE.getConstant(S->getType(), 1); + return true; + } + + // For a Constant, check for a multiple of the given factor. + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { + // 0/x == 0. + if (C->isZero()) + return true; + // Check for divisibility. + if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) { + ConstantInt *CI = + ConstantInt::get(SE.getContext(), + C->getValue()->getValue().sdiv( + FC->getValue()->getValue())); + // If the quotient is zero and the remainder is non-zero, reject + // the value at this scale. It will be considered for subsequent + // smaller scales. + if (!CI->isZero()) { + const SCEV *Div = SE.getConstant(CI); + S = Div; + Remainder = + SE.getAddExpr(Remainder, + SE.getConstant(C->getValue()->getValue().srem( + FC->getValue()->getValue()))); + return true; + } + } + } + + // In a Mul, check if there is a constant operand which is a multiple + // of the given factor. + if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) { + if (TD) { + // With TargetData, the size is known. Check if there is a constant + // operand which is a multiple of the given factor. If so, we can + // factor it. + const SCEVConstant *FC = cast<SCEVConstant>(Factor); + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0))) + if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) { + SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); + NewMulOps[0] = + SE.getConstant(C->getValue()->getValue().sdiv( + FC->getValue()->getValue())); + S = SE.getMulExpr(NewMulOps); + return true; + } + } else { + // Without TargetData, check if Factor can be factored out of any of the + // Mul's operands. If so, we can just remove it. + for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { + const SCEV *SOp = M->getOperand(i); + const SCEV *Remainder = SE.getConstant(SOp->getType(), 0); + if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) && + Remainder->isZero()) { + SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); + NewMulOps[i] = SOp; + S = SE.getMulExpr(NewMulOps); + return true; + } + } + } + } + + // In an AddRec, check if both start and step are divisible. + if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) { + const SCEV *Step = A->getStepRecurrence(SE); + const SCEV *StepRem = SE.getConstant(Step->getType(), 0); + if (!FactorOutConstant(Step, StepRem, Factor, SE, TD)) + return false; + if (!StepRem->isZero()) + return false; + const SCEV *Start = A->getStart(); + if (!FactorOutConstant(Start, Remainder, Factor, SE, TD)) + return false; + S = SE.getAddRecExpr(Start, Step, A->getLoop()); + return true; + } + + return false; +} + +/// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs +/// is the number of SCEVAddRecExprs present, which are kept at the end of +/// the list. +/// +static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops, + const Type *Ty, + ScalarEvolution &SE) { + unsigned NumAddRecs = 0; + for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i) + ++NumAddRecs; + // Group Ops into non-addrecs and addrecs. + SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs); + SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end()); + // Let ScalarEvolution sort and simplify the non-addrecs list. + const SCEV *Sum = NoAddRecs.empty() ? + SE.getConstant(Ty, 0) : + SE.getAddExpr(NoAddRecs); + // If it returned an add, use the operands. Otherwise it simplified + // the sum into a single value, so just use that. + Ops.clear(); + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum)) + Ops.append(Add->op_begin(), Add->op_end()); + else if (!Sum->isZero()) + Ops.push_back(Sum); + // Then append the addrecs. + Ops.append(AddRecs.begin(), AddRecs.end()); +} + +/// SplitAddRecs - Flatten a list of add operands, moving addrec start values +/// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}. +/// This helps expose more opportunities for folding parts of the expressions +/// into GEP indices. +/// +static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops, + const Type *Ty, + ScalarEvolution &SE) { + // Find the addrecs. + SmallVector<const SCEV *, 8> AddRecs; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) { + const SCEV *Start = A->getStart(); + if (Start->isZero()) break; + const SCEV *Zero = SE.getConstant(Ty, 0); + AddRecs.push_back(SE.getAddRecExpr(Zero, + A->getStepRecurrence(SE), + A->getLoop())); + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) { + Ops[i] = Zero; + Ops.append(Add->op_begin(), Add->op_end()); + e += Add->getNumOperands(); + } else { + Ops[i] = Start; + } + } + if (!AddRecs.empty()) { + // Add the addrecs onto the end of the list. + Ops.append(AddRecs.begin(), AddRecs.end()); + // Resort the operand list, moving any constants to the front. + SimplifyAddOperands(Ops, Ty, SE); + } +} + +/// expandAddToGEP - Expand an addition expression with a pointer type into +/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps +/// BasicAliasAnalysis and other passes analyze the result. See the rules +/// for getelementptr vs. inttoptr in +/// http://llvm.org/docs/LangRef.html#pointeraliasing +/// for details. +/// +/// Design note: The correctness of using getelementptr here depends on +/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as +/// they may introduce pointer arithmetic which may not be safely converted +/// into getelementptr. +/// +/// Design note: It might seem desirable for this function to be more +/// loop-aware. If some of the indices are loop-invariant while others +/// aren't, it might seem desirable to emit multiple GEPs, keeping the +/// loop-invariant portions of the overall computation outside the loop. +/// However, there are a few reasons this is not done here. Hoisting simple +/// arithmetic is a low-level optimization that often isn't very +/// important until late in the optimization process. In fact, passes +/// like InstructionCombining will combine GEPs, even if it means +/// pushing loop-invariant computation down into loops, so even if the +/// GEPs were split here, the work would quickly be undone. The +/// LoopStrengthReduction pass, which is usually run quite late (and +/// after the last InstructionCombining pass), takes care of hoisting +/// loop-invariant portions of expressions, after considering what +/// can be folded using target addressing modes. +/// +Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, + const SCEV *const *op_end, + const PointerType *PTy, + const Type *Ty, + Value *V) { + const Type *ElTy = PTy->getElementType(); + SmallVector<Value *, 4> GepIndices; + SmallVector<const SCEV *, 8> Ops(op_begin, op_end); + bool AnyNonZeroIndices = false; + + // Split AddRecs up into parts as either of the parts may be usable + // without the other. + SplitAddRecs(Ops, Ty, SE); + + // Descend down the pointer's type and attempt to convert the other + // operands into GEP indices, at each level. The first index in a GEP + // indexes into the array implied by the pointer operand; the rest of + // the indices index into the element or field type selected by the + // preceding index. + for (;;) { + // If the scale size is not 0, attempt to factor out a scale for + // array indexing. + SmallVector<const SCEV *, 8> ScaledOps; + if (ElTy->isSized()) { + const SCEV *ElSize = SE.getSizeOfExpr(ElTy); + if (!ElSize->isZero()) { + SmallVector<const SCEV *, 8> NewOps; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + const SCEV *Op = Ops[i]; + const SCEV *Remainder = SE.getConstant(Ty, 0); + if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) { + // Op now has ElSize factored out. + ScaledOps.push_back(Op); + if (!Remainder->isZero()) + NewOps.push_back(Remainder); + AnyNonZeroIndices = true; + } else { + // The operand was not divisible, so add it to the list of operands + // we'll scan next iteration. + NewOps.push_back(Ops[i]); + } + } + // If we made any changes, update Ops. + if (!ScaledOps.empty()) { + Ops = NewOps; + SimplifyAddOperands(Ops, Ty, SE); + } + } + } + + // Record the scaled array index for this level of the type. If + // we didn't find any operands that could be factored, tentatively + // assume that element zero was selected (since the zero offset + // would obviously be folded away). + Value *Scaled = ScaledOps.empty() ? + Constant::getNullValue(Ty) : + expandCodeFor(SE.getAddExpr(ScaledOps), Ty); + GepIndices.push_back(Scaled); + + // Collect struct field index operands. + while (const StructType *STy = dyn_cast<StructType>(ElTy)) { + bool FoundFieldNo = false; + // An empty struct has no fields. + if (STy->getNumElements() == 0) break; + if (SE.TD) { + // With TargetData, field offsets are known. See if a constant offset + // falls within any of the struct fields. + if (Ops.empty()) break; + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0])) + if (SE.getTypeSizeInBits(C->getType()) <= 64) { + const StructLayout &SL = *SE.TD->getStructLayout(STy); + uint64_t FullOffset = C->getValue()->getZExtValue(); + if (FullOffset < SL.getSizeInBytes()) { + unsigned ElIdx = SL.getElementContainingOffset(FullOffset); + GepIndices.push_back( + ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx)); + ElTy = STy->getTypeAtIndex(ElIdx); + Ops[0] = + SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx)); + AnyNonZeroIndices = true; + FoundFieldNo = true; + } + } + } else { + // Without TargetData, just check for an offsetof expression of the + // appropriate struct type. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Ops[i])) { + const Type *CTy; + Constant *FieldNo; + if (U->isOffsetOf(CTy, FieldNo) && CTy == STy) { + GepIndices.push_back(FieldNo); + ElTy = + STy->getTypeAtIndex(cast<ConstantInt>(FieldNo)->getZExtValue()); + Ops[i] = SE.getConstant(Ty, 0); + AnyNonZeroIndices = true; + FoundFieldNo = true; + break; + } + } + } + // If no struct field offsets were found, tentatively assume that + // field zero was selected (since the zero offset would obviously + // be folded away). + if (!FoundFieldNo) { + ElTy = STy->getTypeAtIndex(0u); + GepIndices.push_back( + Constant::getNullValue(Type::getInt32Ty(Ty->getContext()))); + } + } + + if (const ArrayType *ATy = dyn_cast<ArrayType>(ElTy)) + ElTy = ATy->getElementType(); + else + break; + } + + // If none of the operands were convertible to proper GEP indices, cast + // the base to i8* and do an ugly getelementptr with that. It's still + // better than ptrtoint+arithmetic+inttoptr at least. + if (!AnyNonZeroIndices) { + // Cast the base to i8*. + V = InsertNoopCastOfTo(V, + Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace())); + + // Expand the operands for a plain byte offset. + Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty); + + // Fold a GEP with constant operands. + if (Constant *CLHS = dyn_cast<Constant>(V)) + if (Constant *CRHS = dyn_cast<Constant>(Idx)) + return ConstantExpr::getGetElementPtr(CLHS, &CRHS, 1); + + // Do a quick scan to see if we have this GEP nearby. If so, reuse it. + unsigned ScanLimit = 6; + BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin(); + // Scanning starts from the last instruction before the insertion point. + BasicBlock::iterator IP = Builder.GetInsertPoint(); + if (IP != BlockBegin) { + --IP; + for (; ScanLimit; --IP, --ScanLimit) { + // Don't count dbg.value against the ScanLimit, to avoid perturbing the + // generated code. + if (isa<DbgInfoIntrinsic>(IP)) + ScanLimit++; + if (IP->getOpcode() == Instruction::GetElementPtr && + IP->getOperand(0) == V && IP->getOperand(1) == Idx) + return IP; + if (IP == BlockBegin) break; + } + } + + // Save the original insertion point so we can restore it when we're done. + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + + // Move the insertion point out of as many loops as we can. + while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { + if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break; + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) break; + + // Ok, move up a level. + Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); + } + + // Emit a GEP. + Value *GEP = Builder.CreateGEP(V, Idx, "uglygep"); + rememberInstruction(GEP); + + // Restore the original insert point. + if (SaveInsertBB) + restoreInsertPoint(SaveInsertBB, SaveInsertPt); + + return GEP; + } + + // Save the original insertion point so we can restore it when we're done. + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + + // Move the insertion point out of as many loops as we can. + while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { + if (!L->isLoopInvariant(V)) break; + + bool AnyIndexNotLoopInvariant = false; + for (SmallVectorImpl<Value *>::const_iterator I = GepIndices.begin(), + E = GepIndices.end(); I != E; ++I) + if (!L->isLoopInvariant(*I)) { + AnyIndexNotLoopInvariant = true; + break; + } + if (AnyIndexNotLoopInvariant) + break; + + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) break; + + // Ok, move up a level. + Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); + } + + // Insert a pretty getelementptr. Note that this GEP is not marked inbounds, + // because ScalarEvolution may have changed the address arithmetic to + // compute a value which is beyond the end of the allocated object. + Value *Casted = V; + if (V->getType() != PTy) + Casted = InsertNoopCastOfTo(Casted, PTy); + Value *GEP = Builder.CreateGEP(Casted, + GepIndices.begin(), + GepIndices.end(), + "scevgep"); + Ops.push_back(SE.getUnknown(GEP)); + rememberInstruction(GEP); + + // Restore the original insert point. + if (SaveInsertBB) + restoreInsertPoint(SaveInsertBB, SaveInsertPt); + + return expand(SE.getAddExpr(Ops)); +} + +/// isNonConstantNegative - Return true if the specified scev is negated, but +/// not a constant. +static bool isNonConstantNegative(const SCEV *F) { + const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(F); + if (!Mul) return false; + + // If there is a constant factor, it will be first. + const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0)); + if (!SC) return false; + + // Return true if the value is negative, this matches things like (-42 * V). + return SC->getValue()->getValue().isNegative(); +} + +/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for +/// SCEV expansion. If they are nested, this is the most nested. If they are +/// neighboring, pick the later. +static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B, + DominatorTree &DT) { + if (!A) return B; + if (!B) return A; + if (A->contains(B)) return B; + if (B->contains(A)) return A; + if (DT.dominates(A->getHeader(), B->getHeader())) return B; + if (DT.dominates(B->getHeader(), A->getHeader())) return A; + return A; // Arbitrarily break the tie. +} + +/// GetRelevantLoop - Get the most relevant loop associated with the given +/// expression, according to PickMostRelevantLoop. +static const Loop *GetRelevantLoop(const SCEV *S, LoopInfo &LI, + DominatorTree &DT) { + if (isa<SCEVConstant>(S)) + return 0; + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + if (const Instruction *I = dyn_cast<Instruction>(U->getValue())) + return LI.getLoopFor(I->getParent()); + return 0; + } + if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) { + const Loop *L = 0; + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) + L = AR->getLoop(); + for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end(); + I != E; ++I) + L = PickMostRelevantLoop(L, GetRelevantLoop(*I, LI, DT), DT); + return L; + } + if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) + return GetRelevantLoop(C->getOperand(), LI, DT); + if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) + return PickMostRelevantLoop(GetRelevantLoop(D->getLHS(), LI, DT), + GetRelevantLoop(D->getRHS(), LI, DT), + DT); + llvm_unreachable("Unexpected SCEV type!"); +} + +namespace { + +/// LoopCompare - Compare loops by PickMostRelevantLoop. +class LoopCompare { + DominatorTree &DT; +public: + explicit LoopCompare(DominatorTree &dt) : DT(dt) {} + + bool operator()(std::pair<const Loop *, const SCEV *> LHS, + std::pair<const Loop *, const SCEV *> RHS) const { + // Keep pointer operands sorted at the end. + if (LHS.second->getType()->isPointerTy() != + RHS.second->getType()->isPointerTy()) + return LHS.second->getType()->isPointerTy(); + + // Compare loops with PickMostRelevantLoop. + if (LHS.first != RHS.first) + return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first; + + // If one operand is a non-constant negative and the other is not, + // put the non-constant negative on the right so that a sub can + // be used instead of a negate and add. + if (isNonConstantNegative(LHS.second)) { + if (!isNonConstantNegative(RHS.second)) + return false; + } else if (isNonConstantNegative(RHS.second)) + return true; + + // Otherwise they are equivalent according to this comparison. + return false; + } +}; + +} + +Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { + const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + + // Collect all the add operands in a loop, along with their associated loops. + // Iterate in reverse so that constants are emitted last, all else equal, and + // so that pointer operands are inserted first, which the code below relies on + // to form more involved GEPs. + SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops; + for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()), + E(S->op_begin()); I != E; ++I) + OpsAndLoops.push_back(std::make_pair(GetRelevantLoop(*I, *SE.LI, *SE.DT), + *I)); + + // Sort by loop. Use a stable sort so that constants follow non-constants and + // pointer operands precede non-pointer operands. + std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT)); + + // Emit instructions to add all the operands. Hoist as much as possible + // out of loops, and form meaningful getelementptrs where possible. + Value *Sum = 0; + for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator + I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { + const Loop *CurLoop = I->first; + const SCEV *Op = I->second; + if (!Sum) { + // This is the first operand. Just expand it. + Sum = expand(Op); + ++I; + } else if (const PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) { + // The running sum expression is a pointer. Try to form a getelementptr + // at this level with that as the base. + SmallVector<const SCEV *, 4> NewOps; + for (; I != E && I->first == CurLoop; ++I) { + // If the operand is SCEVUnknown and not instructions, peek through + // it, to enable more of it to be folded into the GEP. + const SCEV *X = I->second; + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X)) + if (!isa<Instruction>(U->getValue())) + X = SE.getSCEV(U->getValue()); + NewOps.push_back(X); + } + Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum); + } else if (const PointerType *PTy = dyn_cast<PointerType>(Op->getType())) { + // The running sum is an integer, and there's a pointer at this level. + // Try to form a getelementptr. If the running sum is instructions, + // use a SCEVUnknown to avoid re-analyzing them. + SmallVector<const SCEV *, 4> NewOps; + NewOps.push_back(isa<Instruction>(Sum) ? SE.getUnknown(Sum) : + SE.getSCEV(Sum)); + for (++I; I != E && I->first == CurLoop; ++I) + NewOps.push_back(I->second); + Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op)); + } else if (isNonConstantNegative(Op)) { + // Instead of doing a negate and add, just do a subtract. + Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty); + Sum = InsertNoopCastOfTo(Sum, Ty); + Sum = InsertBinop(Instruction::Sub, Sum, W); + ++I; + } else { + // A simple add. + Value *W = expandCodeFor(Op, Ty); + Sum = InsertNoopCastOfTo(Sum, Ty); + // Canonicalize a constant to the RHS. + if (isa<Constant>(Sum)) std::swap(Sum, W); + Sum = InsertBinop(Instruction::Add, Sum, W); + ++I; + } + } + + return Sum; +} + +Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { + const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + + // Collect all the mul operands in a loop, along with their associated loops. + // Iterate in reverse so that constants are emitted last, all else equal. + SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops; + for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()), + E(S->op_begin()); I != E; ++I) + OpsAndLoops.push_back(std::make_pair(GetRelevantLoop(*I, *SE.LI, *SE.DT), + *I)); + + // Sort by loop. Use a stable sort so that constants follow non-constants. + std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT)); + + // Emit instructions to mul all the operands. Hoist as much as possible + // out of loops. + Value *Prod = 0; + for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator + I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { + const SCEV *Op = I->second; + if (!Prod) { + // This is the first operand. Just expand it. + Prod = expand(Op); + ++I; + } else if (Op->isAllOnesValue()) { + // Instead of doing a multiply by negative one, just do a negate. + Prod = InsertNoopCastOfTo(Prod, Ty); + Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod); + ++I; + } else { + // A simple mul. + Value *W = expandCodeFor(Op, Ty); + Prod = InsertNoopCastOfTo(Prod, Ty); + // Canonicalize a constant to the RHS. + if (isa<Constant>(Prod)) std::swap(Prod, W); + Prod = InsertBinop(Instruction::Mul, Prod, W); + ++I; + } + } + + return Prod; +} + +Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { + const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + + Value *LHS = expandCodeFor(S->getLHS(), Ty); + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) { + const APInt &RHS = SC->getValue()->getValue(); + if (RHS.isPowerOf2()) + return InsertBinop(Instruction::LShr, LHS, + ConstantInt::get(Ty, RHS.logBase2())); + } + + Value *RHS = expandCodeFor(S->getRHS(), Ty); + return InsertBinop(Instruction::UDiv, LHS, RHS); +} + +/// Move parts of Base into Rest to leave Base with the minimal +/// expression that provides a pointer operand suitable for a +/// GEP expansion. +static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, + ScalarEvolution &SE) { + while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) { + Base = A->getStart(); + Rest = SE.getAddExpr(Rest, + SE.getAddRecExpr(SE.getConstant(A->getType(), 0), + A->getStepRecurrence(SE), + A->getLoop())); + } + if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) { + Base = A->getOperand(A->getNumOperands()-1); + SmallVector<const SCEV *, 8> NewAddOps(A->op_begin(), A->op_end()); + NewAddOps.back() = Rest; + Rest = SE.getAddExpr(NewAddOps); + ExposePointerBase(Base, Rest, SE); + } +} + +/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand +/// the base addrec, which is the addrec without any non-loop-dominating +/// values, and return the PHI. +PHINode * +SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, + const Loop *L, + const Type *ExpandTy, + const Type *IntTy) { + // Reuse a previously-inserted PHI, if present. + for (BasicBlock::iterator I = L->getHeader()->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) + if (SE.isSCEVable(PN->getType()) && + (SE.getEffectiveSCEVType(PN->getType()) == + SE.getEffectiveSCEVType(Normalized->getType())) && + SE.getSCEV(PN) == Normalized) + if (BasicBlock *LatchBlock = L->getLoopLatch()) { + Instruction *IncV = + cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)); + + // Determine if this is a well-behaved chain of instructions leading + // back to the PHI. It probably will be, if we're scanning an inner + // loop already visited by LSR for example, but it wouldn't have + // to be. + do { + if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV)) { + IncV = 0; + break; + } + // If any of the operands don't dominate the insert position, bail. + // Addrec operands are always loop-invariant, so this can only happen + // if there are instructions which haven't been hoisted. + for (User::op_iterator OI = IncV->op_begin()+1, + OE = IncV->op_end(); OI != OE; ++OI) + if (Instruction *OInst = dyn_cast<Instruction>(OI)) + if (!SE.DT->dominates(OInst, IVIncInsertPos)) { + IncV = 0; + break; + } + if (!IncV) + break; + // Advance to the next instruction. + IncV = dyn_cast<Instruction>(IncV->getOperand(0)); + if (!IncV) + break; + if (IncV->mayHaveSideEffects()) { + IncV = 0; + break; + } + } while (IncV != PN); + + if (IncV) { + // Ok, the add recurrence looks usable. + // Remember this PHI, even in post-inc mode. + InsertedValues.insert(PN); + // Remember the increment. + IncV = cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)); + rememberInstruction(IncV); + if (L == IVIncInsertLoop) + do { + if (SE.DT->dominates(IncV, IVIncInsertPos)) + break; + // Make sure the increment is where we want it. But don't move it + // down past a potential existing post-inc user. + IncV->moveBefore(IVIncInsertPos); + IVIncInsertPos = IncV; + IncV = cast<Instruction>(IncV->getOperand(0)); + } while (IncV != PN); + return PN; + } + } + + // Save the original insertion point so we can restore it when we're done. + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + + // Expand code for the start value. + Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy, + L->getHeader()->begin()); + + // Expand code for the step value. Insert instructions right before the + // terminator corresponding to the back-edge. Do this before creating the PHI + // so that PHI reuse code doesn't see an incomplete PHI. If the stride is + // negative, insert a sub instead of an add for the increment (unless it's a + // constant, because subtracts of constants are canonicalized to adds). + const SCEV *Step = Normalized->getStepRecurrence(SE); + bool isPointer = ExpandTy->isPointerTy(); + bool isNegative = !isPointer && isNonConstantNegative(Step); + if (isNegative) + Step = SE.getNegativeSCEV(Step); + Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); + + // Create the PHI. + Builder.SetInsertPoint(L->getHeader(), L->getHeader()->begin()); + PHINode *PN = Builder.CreatePHI(ExpandTy, "lsr.iv"); + rememberInstruction(PN); + + // Create the step instructions and populate the PHI. + BasicBlock *Header = L->getHeader(); + for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header); + HPI != HPE; ++HPI) { + BasicBlock *Pred = *HPI; + + // Add a start value. + if (!L->contains(Pred)) { + PN->addIncoming(StartV, Pred); + continue; + } + + // Create a step value and add it to the PHI. If IVIncInsertLoop is + // non-null and equal to the addrec's loop, insert the instructions + // at IVIncInsertPos. + Instruction *InsertPos = L == IVIncInsertLoop ? + IVIncInsertPos : Pred->getTerminator(); + Builder.SetInsertPoint(InsertPos->getParent(), InsertPos); + Value *IncV; + // If the PHI is a pointer, use a GEP, otherwise use an add or sub. + if (isPointer) { + const PointerType *GEPPtrTy = cast<PointerType>(ExpandTy); + // If the step isn't constant, don't use an implicitly scaled GEP, because + // that would require a multiply inside the loop. + if (!isa<ConstantInt>(StepV)) + GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()), + GEPPtrTy->getAddressSpace()); + const SCEV *const StepArray[1] = { SE.getSCEV(StepV) }; + IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN); + if (IncV->getType() != PN->getType()) { + IncV = Builder.CreateBitCast(IncV, PN->getType(), "tmp"); + rememberInstruction(IncV); + } + } else { + IncV = isNegative ? + Builder.CreateSub(PN, StepV, "lsr.iv.next") : + Builder.CreateAdd(PN, StepV, "lsr.iv.next"); + rememberInstruction(IncV); + } + PN->addIncoming(IncV, Pred); + } + + // Restore the original insert point. + if (SaveInsertBB) + restoreInsertPoint(SaveInsertBB, SaveInsertPt); + + // Remember this PHI, even in post-inc mode. + InsertedValues.insert(PN); + + return PN; +} + +Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { + const Type *STy = S->getType(); + const Type *IntTy = SE.getEffectiveSCEVType(STy); + const Loop *L = S->getLoop(); + + // Determine a normalized form of this expression, which is the expression + // before any post-inc adjustment is made. + const SCEVAddRecExpr *Normalized = S; + if (PostIncLoops.count(L)) { + PostIncLoopSet Loops; + Loops.insert(L); + Normalized = + cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, 0, 0, + Loops, SE, *SE.DT)); + } + + // Strip off any non-loop-dominating component from the addrec start. + const SCEV *Start = Normalized->getStart(); + const SCEV *PostLoopOffset = 0; + if (!Start->properlyDominates(L->getHeader(), SE.DT)) { + PostLoopOffset = Start; + Start = SE.getConstant(Normalized->getType(), 0); + Normalized = + cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start, + Normalized->getStepRecurrence(SE), + Normalized->getLoop())); + } + + // Strip off any non-loop-dominating component from the addrec step. + const SCEV *Step = Normalized->getStepRecurrence(SE); + const SCEV *PostLoopScale = 0; + if (!Step->dominates(L->getHeader(), SE.DT)) { + PostLoopScale = Step; + Step = SE.getConstant(Normalized->getType(), 1); + Normalized = + cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start, Step, + Normalized->getLoop())); + } + + // Expand the core addrec. If we need post-loop scaling, force it to + // expand to an integer type to avoid the need for additional casting. + const Type *ExpandTy = PostLoopScale ? IntTy : STy; + PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy); + + // Accommodate post-inc mode, if necessary. + Value *Result; + if (!PostIncLoops.count(L)) + Result = PN; + else { + // In PostInc mode, use the post-incremented value. + BasicBlock *LatchBlock = L->getLoopLatch(); + assert(LatchBlock && "PostInc mode requires a unique loop latch!"); + Result = PN->getIncomingValueForBlock(LatchBlock); + } + + // Re-apply any non-loop-dominating scale. + if (PostLoopScale) { + Result = InsertNoopCastOfTo(Result, IntTy); + Result = Builder.CreateMul(Result, + expandCodeFor(PostLoopScale, IntTy)); + rememberInstruction(Result); + } + + // Re-apply any non-loop-dominating offset. + if (PostLoopOffset) { + if (const PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) { + const SCEV *const OffsetArray[1] = { PostLoopOffset }; + Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result); + } else { + Result = InsertNoopCastOfTo(Result, IntTy); + Result = Builder.CreateAdd(Result, + expandCodeFor(PostLoopOffset, IntTy)); + rememberInstruction(Result); + } + } + + return Result; +} + +Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { + if (!CanonicalMode) return expandAddRecExprLiterally(S); + + const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + const Loop *L = S->getLoop(); + + // First check for an existing canonical IV in a suitable type. + PHINode *CanonicalIV = 0; + if (PHINode *PN = L->getCanonicalInductionVariable()) + if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty)) + CanonicalIV = PN; + + // Rewrite an AddRec in terms of the canonical induction variable, if + // its type is more narrow. + if (CanonicalIV && + SE.getTypeSizeInBits(CanonicalIV->getType()) > + SE.getTypeSizeInBits(Ty)) { + SmallVector<const SCEV *, 4> NewOps(S->getNumOperands()); + for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) + NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType()); + Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop())); + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + BasicBlock::iterator NewInsertPt = + llvm::next(BasicBlock::iterator(cast<Instruction>(V))); + while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt)) + ++NewInsertPt; + V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0, + NewInsertPt); + restoreInsertPoint(SaveInsertBB, SaveInsertPt); + return V; + } + + // {X,+,F} --> X + {0,+,F} + if (!S->getStart()->isZero()) { + SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end()); + NewOps[0] = SE.getConstant(Ty, 0); + const SCEV *Rest = SE.getAddRecExpr(NewOps, L); + + // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the + // comments on expandAddToGEP for details. + const SCEV *Base = S->getStart(); + const SCEV *RestArray[1] = { Rest }; + // Dig into the expression to find the pointer base for a GEP. + ExposePointerBase(Base, RestArray[0], SE); + // If we found a pointer, expand the AddRec with a GEP. + if (const PointerType *PTy = dyn_cast<PointerType>(Base->getType())) { + // Make sure the Base isn't something exotic, such as a multiplied + // or divided pointer value. In those cases, the result type isn't + // actually a pointer type. + if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) { + Value *StartV = expand(Base); + assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!"); + return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV); + } + } + + // Just do a normal add. Pre-expand the operands to suppress folding. + return expand(SE.getAddExpr(SE.getUnknown(expand(S->getStart())), + SE.getUnknown(expand(Rest)))); + } + + // If we don't yet have a canonical IV, create one. + if (!CanonicalIV) { + // Create and insert the PHI node for the induction variable in the + // specified loop. + BasicBlock *Header = L->getHeader(); + CanonicalIV = PHINode::Create(Ty, "indvar", Header->begin()); + rememberInstruction(CanonicalIV); + + Constant *One = ConstantInt::get(Ty, 1); + for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header); + HPI != HPE; ++HPI) { + BasicBlock *HP = *HPI; + if (L->contains(HP)) { + // Insert a unit add instruction right before the terminator + // corresponding to the back-edge. + Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One, + "indvar.next", + HP->getTerminator()); + rememberInstruction(Add); + CanonicalIV->addIncoming(Add, HP); + } else { + CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP); + } + } + } + + // {0,+,1} --> Insert a canonical induction variable into the loop! + if (S->isAffine() && S->getOperand(1)->isOne()) { + assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) && + "IVs with types different from the canonical IV should " + "already have been handled!"); + return CanonicalIV; + } + + // {0,+,F} --> {0,+,1} * F + + // If this is a simple linear addrec, emit it now as a special case. + if (S->isAffine()) // {0,+,F} --> i*F + return + expand(SE.getTruncateOrNoop( + SE.getMulExpr(SE.getUnknown(CanonicalIV), + SE.getNoopOrAnyExtend(S->getOperand(1), + CanonicalIV->getType())), + Ty)); + + // If this is a chain of recurrences, turn it into a closed form, using the + // folders, then expandCodeFor the closed form. This allows the folders to + // simplify the expression without having to build a bunch of special code + // into this folder. + const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV. + + // Promote S up to the canonical IV type, if the cast is foldable. + const SCEV *NewS = S; + const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType()); + if (isa<SCEVAddRecExpr>(Ext)) + NewS = Ext; + + const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE); + //cerr << "Evaluated: " << *this << "\n to: " << *V << "\n"; + + // Truncate the result down to the original type, if needed. + const SCEV *T = SE.getTruncateOrNoop(V, Ty); + return expand(T); +} + +Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) { + const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Value *V = expandCodeFor(S->getOperand(), + SE.getEffectiveSCEVType(S->getOperand()->getType())); + Value *I = Builder.CreateTrunc(V, Ty, "tmp"); + rememberInstruction(I); + return I; +} + +Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) { + const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Value *V = expandCodeFor(S->getOperand(), + SE.getEffectiveSCEVType(S->getOperand()->getType())); + Value *I = Builder.CreateZExt(V, Ty, "tmp"); + rememberInstruction(I); + return I; +} + +Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) { + const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Value *V = expandCodeFor(S->getOperand(), + SE.getEffectiveSCEVType(S->getOperand()->getType())); + Value *I = Builder.CreateSExt(V, Ty, "tmp"); + rememberInstruction(I); + return I; +} + +Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { + Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); + const Type *Ty = LHS->getType(); + for (int i = S->getNumOperands()-2; i >= 0; --i) { + // In the case of mixed integer and pointer types, do the + // rest of the comparisons as integer. + if (S->getOperand(i)->getType() != Ty) { + Ty = SE.getEffectiveSCEVType(Ty); + LHS = InsertNoopCastOfTo(LHS, Ty); + } + Value *RHS = expandCodeFor(S->getOperand(i), Ty); + Value *ICmp = Builder.CreateICmpSGT(LHS, RHS, "tmp"); + rememberInstruction(ICmp); + Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); + rememberInstruction(Sel); + LHS = Sel; + } + // In the case of mixed integer and pointer types, cast the + // final result back to the pointer type. + if (LHS->getType() != S->getType()) + LHS = InsertNoopCastOfTo(LHS, S->getType()); + return LHS; +} + +Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { + Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); + const Type *Ty = LHS->getType(); + for (int i = S->getNumOperands()-2; i >= 0; --i) { + // In the case of mixed integer and pointer types, do the + // rest of the comparisons as integer. + if (S->getOperand(i)->getType() != Ty) { + Ty = SE.getEffectiveSCEVType(Ty); + LHS = InsertNoopCastOfTo(LHS, Ty); + } + Value *RHS = expandCodeFor(S->getOperand(i), Ty); + Value *ICmp = Builder.CreateICmpUGT(LHS, RHS, "tmp"); + rememberInstruction(ICmp); + Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax"); + rememberInstruction(Sel); + LHS = Sel; + } + // In the case of mixed integer and pointer types, cast the + // final result back to the pointer type. + if (LHS->getType() != S->getType()) + LHS = InsertNoopCastOfTo(LHS, S->getType()); + return LHS; +} + +Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty, + Instruction *I) { + BasicBlock::iterator IP = I; + while (isInsertedInstruction(IP) || isa<DbgInfoIntrinsic>(IP)) + ++IP; + Builder.SetInsertPoint(IP->getParent(), IP); + return expandCodeFor(SH, Ty); +} + +Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty) { + // Expand the code for this SCEV. + Value *V = expand(SH); + if (Ty) { + assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) && + "non-trivial casts should be done with the SCEVs directly!"); + V = InsertNoopCastOfTo(V, Ty); + } + return V; +} + +Value *SCEVExpander::expand(const SCEV *S) { + // Compute an insertion point for this SCEV object. Hoist the instructions + // as far out in the loop nest as possible. + Instruction *InsertPt = Builder.GetInsertPoint(); + for (Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock()); ; + L = L->getParentLoop()) + if (S->isLoopInvariant(L)) { + if (!L) break; + if (BasicBlock *Preheader = L->getLoopPreheader()) + InsertPt = Preheader->getTerminator(); + } else { + // If the SCEV is computable at this level, insert it into the header + // after the PHIs (and after any other instructions that we've inserted + // there) so that it is guaranteed to dominate any user inside the loop. + if (L && S->hasComputableLoopEvolution(L) && !PostIncLoops.count(L)) + InsertPt = L->getHeader()->getFirstNonPHI(); + while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt)) + InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); + break; + } + + // Check to see if we already expanded this here. + std::map<std::pair<const SCEV *, Instruction *>, + AssertingVH<Value> >::iterator I = + InsertedExpressions.find(std::make_pair(S, InsertPt)); + if (I != InsertedExpressions.end()) + return I->second; + + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + Builder.SetInsertPoint(InsertPt->getParent(), InsertPt); + + // Expand the expression into instructions. + Value *V = visit(S); + + // Remember the expanded value for this SCEV at this location. + if (PostIncLoops.empty()) + InsertedExpressions[std::make_pair(S, InsertPt)] = V; + + restoreInsertPoint(SaveInsertBB, SaveInsertPt); + return V; +} + +void SCEVExpander::rememberInstruction(Value *I) { + if (!PostIncLoops.empty()) + InsertedPostIncValues.insert(I); + else + InsertedValues.insert(I); + + // If we just claimed an existing instruction and that instruction had + // been the insert point, adjust the insert point forward so that + // subsequently inserted code will be dominated. + if (Builder.GetInsertPoint() == I) { + BasicBlock::iterator It = cast<Instruction>(I); + do { ++It; } while (isInsertedInstruction(It) || + isa<DbgInfoIntrinsic>(It)); + Builder.SetInsertPoint(Builder.GetInsertBlock(), It); + } +} + +void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) { + // If we acquired more instructions since the old insert point was saved, + // advance past them. + while (isInsertedInstruction(I) || isa<DbgInfoIntrinsic>(I)) ++I; + + Builder.SetInsertPoint(BB, I); +} + +/// getOrInsertCanonicalInductionVariable - This method returns the +/// canonical induction variable of the specified type for the specified +/// loop (inserting one if there is none). A canonical induction variable +/// starts at zero and steps by one on each iteration. +PHINode * +SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, + const Type *Ty) { + assert(Ty->isIntegerTy() && "Can only insert integer induction variables!"); + + // Build a SCEV for {0,+,1}<L>. + const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0), + SE.getConstant(Ty, 1), L); + + // Emit code for it. + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + PHINode *V = cast<PHINode>(expandCodeFor(H, 0, L->getHeader()->begin())); + if (SaveInsertBB) + restoreInsertPoint(SaveInsertBB, SaveInsertPt); + + return V; +} diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp new file mode 100644 index 0000000..ac36cef --- /dev/null +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -0,0 +1,183 @@ +//===- ScalarEvolutionNormalization.cpp - See below -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements utilities for working with "normalized" expressions. +// See the comments at the top of ScalarEvolutionNormalization.h for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ScalarEvolutionNormalization.h" +using namespace llvm; + +/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression +/// and now we need to decide whether the user should use the preinc or post-inc +/// value. If this user should use the post-inc version of the IV, return true. +/// +/// Choosing wrong here can break dominance properties (if we choose to use the +/// post-inc value when we cannot) or it can end up adding extra live-ranges to +/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we +/// should use the post-inc value). +static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand, + const Loop *L, DominatorTree *DT) { + // If the user is in the loop, use the preinc value. + if (L->contains(User)) return false; + + BasicBlock *LatchBlock = L->getLoopLatch(); + if (!LatchBlock) + return false; + + // Ok, the user is outside of the loop. If it is dominated by the latch + // block, use the post-inc value. + if (DT->dominates(LatchBlock, User->getParent())) + return true; + + // There is one case we have to be careful of: PHI nodes. These little guys + // can live in blocks that are not dominated by the latch block, but (since + // their uses occur in the predecessor block, not the block the PHI lives in) + // should still use the post-inc value. Check for this case now. + PHINode *PN = dyn_cast<PHINode>(User); + if (!PN || !Operand) return false; // not a phi, not dominated by latch block. + + // Look at all of the uses of Operand by the PHI node. If any use corresponds + // to a block that is not dominated by the latch block, give up and use the + // preincremented value. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == Operand && + !DT->dominates(LatchBlock, PN->getIncomingBlock(i))) + return false; + + // Okay, all uses of Operand by PN are in predecessor blocks that really are + // dominated by the latch block. Use the post-incremented value. + return true; +} + +const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, + const SCEV *S, + Instruction *User, + Value *OperandValToReplace, + PostIncLoopSet &Loops, + ScalarEvolution &SE, + DominatorTree &DT) { + if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S)) + return S; + + if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) { + const SCEV *O = X->getOperand(); + const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace, + Loops, SE, DT); + if (O != N) + switch (S->getSCEVType()) { + case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType()); + case scSignExtend: return SE.getSignExtendExpr(N, S->getType()); + case scTruncate: return SE.getTruncateExpr(N, S->getType()); + default: llvm_unreachable("Unexpected SCEVCastExpr kind!"); + } + return S; + } + + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + // An addrec. This is the interesting part. + SmallVector<const SCEV *, 8> Operands; + const Loop *L = AR->getLoop(); + // The addrec conceptually uses its operands at loop entry. + Instruction *LUser = L->getHeader()->begin(); + // Transform each operand. + for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); + I != E; ++I) { + const SCEV *O = *I; + const SCEV *N = TransformForPostIncUse(Kind, O, LUser, 0, Loops, SE, DT); + Operands.push_back(N); + } + const SCEV *Result = SE.getAddRecExpr(Operands, L); + switch (Kind) { + default: llvm_unreachable("Unexpected transform name!"); + case NormalizeAutodetect: + if (IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) { + const SCEV *TransformedStep = + TransformForPostIncUse(Kind, AR->getStepRecurrence(SE), + User, OperandValToReplace, Loops, SE, DT); + Result = SE.getMinusSCEV(Result, TransformedStep); + Loops.insert(L); + } +#if 0 + // This assert is conceptually correct, but ScalarEvolution currently + // sometimes fails to canonicalize two equal SCEVs to exactly the same + // form. It's possibly a pessimization when this happens, but it isn't a + // correctness problem, so disable this assert for now. + assert(S == TransformForPostIncUse(Denormalize, Result, + User, OperandValToReplace, + Loops, SE, DT) && + "SCEV normalization is not invertible!"); +#endif + break; + case Normalize: + if (Loops.count(L)) { + const SCEV *TransformedStep = + TransformForPostIncUse(Kind, AR->getStepRecurrence(SE), + User, OperandValToReplace, Loops, SE, DT); + Result = SE.getMinusSCEV(Result, TransformedStep); + } +#if 0 + // See the comment on the assert above. + assert(S == TransformForPostIncUse(Denormalize, Result, + User, OperandValToReplace, + Loops, SE, DT) && + "SCEV normalization is not invertible!"); +#endif + break; + case Denormalize: + if (Loops.count(L)) + Result = cast<SCEVAddRecExpr>(Result)->getPostIncExpr(SE); + break; + } + return Result; + } + + if (const SCEVNAryExpr *X = dyn_cast<SCEVNAryExpr>(S)) { + SmallVector<const SCEV *, 8> Operands; + bool Changed = false; + // Transform each operand. + for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end(); + I != E; ++I) { + const SCEV *O = *I; + const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace, + Loops, SE, DT); + Changed |= N != O; + Operands.push_back(N); + } + // If any operand actually changed, return a transformed result. + if (Changed) + switch (S->getSCEVType()) { + case scAddExpr: return SE.getAddExpr(Operands); + case scMulExpr: return SE.getMulExpr(Operands); + case scSMaxExpr: return SE.getSMaxExpr(Operands); + case scUMaxExpr: return SE.getUMaxExpr(Operands); + default: llvm_unreachable("Unexpected SCEVNAryExpr kind!"); + } + return S; + } + + if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) { + const SCEV *LO = X->getLHS(); + const SCEV *RO = X->getRHS(); + const SCEV *LN = TransformForPostIncUse(Kind, LO, User, OperandValToReplace, + Loops, SE, DT); + const SCEV *RN = TransformForPostIncUse(Kind, RO, User, OperandValToReplace, + Loops, SE, DT); + if (LO != LN || RO != RN) + return SE.getUDivExpr(LN, RN); + return S; + } + + llvm_unreachable("Unexpected SCEV kind!"); + return 0; +} diff --git a/contrib/llvm/lib/Analysis/SparsePropagation.cpp b/contrib/llvm/lib/Analysis/SparsePropagation.cpp new file mode 100644 index 0000000..d8c207b --- /dev/null +++ b/contrib/llvm/lib/Analysis/SparsePropagation.cpp @@ -0,0 +1,347 @@ +//===- SparsePropagation.cpp - Sparse Conditional Property Propagation ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements an abstract sparse conditional propagation algorithm, +// modeled after SCCP, but with a customizable lattice function. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sparseprop" +#include "llvm/Analysis/SparsePropagation.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// AbstractLatticeFunction Implementation +//===----------------------------------------------------------------------===// + +AbstractLatticeFunction::~AbstractLatticeFunction() {} + +/// PrintValue - Render the specified lattice value to the specified stream. +void AbstractLatticeFunction::PrintValue(LatticeVal V, raw_ostream &OS) { + if (V == UndefVal) + OS << "undefined"; + else if (V == OverdefinedVal) + OS << "overdefined"; + else if (V == UntrackedVal) + OS << "untracked"; + else + OS << "unknown lattice value"; +} + +//===----------------------------------------------------------------------===// +// SparseSolver Implementation +//===----------------------------------------------------------------------===// + +/// getOrInitValueState - Return the LatticeVal object that corresponds to the +/// value, initializing the value's state if it hasn't been entered into the +/// map yet. This function is necessary because not all values should start +/// out in the underdefined state... Arguments should be overdefined, and +/// constants should be marked as constants. +/// +SparseSolver::LatticeVal SparseSolver::getOrInitValueState(Value *V) { + DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(V); + if (I != ValueState.end()) return I->second; // Common case, in the map + + LatticeVal LV; + if (LatticeFunc->IsUntrackedValue(V)) + return LatticeFunc->getUntrackedVal(); + else if (Constant *C = dyn_cast<Constant>(V)) + LV = LatticeFunc->ComputeConstant(C); + else if (Argument *A = dyn_cast<Argument>(V)) + LV = LatticeFunc->ComputeArgument(A); + else if (!isa<Instruction>(V)) + // All other non-instructions are overdefined. + LV = LatticeFunc->getOverdefinedVal(); + else + // All instructions are underdefined by default. + LV = LatticeFunc->getUndefVal(); + + // If this value is untracked, don't add it to the map. + if (LV == LatticeFunc->getUntrackedVal()) + return LV; + return ValueState[V] = LV; +} + +/// UpdateState - When the state for some instruction is potentially updated, +/// this function notices and adds I to the worklist if needed. +void SparseSolver::UpdateState(Instruction &Inst, LatticeVal V) { + DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(&Inst); + if (I != ValueState.end() && I->second == V) + return; // No change. + + // An update. Visit uses of I. + ValueState[&Inst] = V; + InstWorkList.push_back(&Inst); +} + +/// MarkBlockExecutable - This method can be used by clients to mark all of +/// the blocks that are known to be intrinsically live in the processed unit. +void SparseSolver::MarkBlockExecutable(BasicBlock *BB) { + DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << "\n"); + BBExecutable.insert(BB); // Basic block is executable! + BBWorkList.push_back(BB); // Add the block to the work list! +} + +/// markEdgeExecutable - Mark a basic block as executable, adding it to the BB +/// work list if it is not already executable... +void SparseSolver::markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) { + if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second) + return; // This edge is already known to be executable! + + DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName() + << " -> " << Dest->getName() << "\n"); + + if (BBExecutable.count(Dest)) { + // The destination is already executable, but we just made an edge + // feasible that wasn't before. Revisit the PHI nodes in the block + // because they have potentially new operands. + for (BasicBlock::iterator I = Dest->begin(); isa<PHINode>(I); ++I) + visitPHINode(*cast<PHINode>(I)); + + } else { + MarkBlockExecutable(Dest); + } +} + + +/// getFeasibleSuccessors - Return a vector of booleans to indicate which +/// successors are reachable from a given terminator instruction. +void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI, + SmallVectorImpl<bool> &Succs, + bool AggressiveUndef) { + Succs.resize(TI.getNumSuccessors()); + if (TI.getNumSuccessors() == 0) return; + + if (BranchInst *BI = dyn_cast<BranchInst>(&TI)) { + if (BI->isUnconditional()) { + Succs[0] = true; + return; + } + + LatticeVal BCValue; + if (AggressiveUndef) + BCValue = getOrInitValueState(BI->getCondition()); + else + BCValue = getLatticeState(BI->getCondition()); + + if (BCValue == LatticeFunc->getOverdefinedVal() || + BCValue == LatticeFunc->getUntrackedVal()) { + // Overdefined condition variables can branch either way. + Succs[0] = Succs[1] = true; + return; + } + + // If undefined, neither is feasible yet. + if (BCValue == LatticeFunc->getUndefVal()) + return; + + Constant *C = LatticeFunc->GetConstant(BCValue, BI->getCondition(), *this); + if (C == 0 || !isa<ConstantInt>(C)) { + // Non-constant values can go either way. + Succs[0] = Succs[1] = true; + return; + } + + // Constant condition variables mean the branch can only go a single way + Succs[C->isNullValue()] = true; + return; + } + + if (isa<InvokeInst>(TI)) { + // Invoke instructions successors are always executable. + // TODO: Could ask the lattice function if the value can throw. + Succs[0] = Succs[1] = true; + return; + } + + if (isa<IndirectBrInst>(TI)) { + Succs.assign(Succs.size(), true); + return; + } + + SwitchInst &SI = cast<SwitchInst>(TI); + LatticeVal SCValue; + if (AggressiveUndef) + SCValue = getOrInitValueState(SI.getCondition()); + else + SCValue = getLatticeState(SI.getCondition()); + + if (SCValue == LatticeFunc->getOverdefinedVal() || + SCValue == LatticeFunc->getUntrackedVal()) { + // All destinations are executable! + Succs.assign(TI.getNumSuccessors(), true); + return; + } + + // If undefined, neither is feasible yet. + if (SCValue == LatticeFunc->getUndefVal()) + return; + + Constant *C = LatticeFunc->GetConstant(SCValue, SI.getCondition(), *this); + if (C == 0 || !isa<ConstantInt>(C)) { + // All destinations are executable! + Succs.assign(TI.getNumSuccessors(), true); + return; + } + + Succs[SI.findCaseValue(cast<ConstantInt>(C))] = true; +} + + +/// isEdgeFeasible - Return true if the control flow edge from the 'From' +/// basic block to the 'To' basic block is currently feasible... +bool SparseSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To, + bool AggressiveUndef) { + SmallVector<bool, 16> SuccFeasible; + TerminatorInst *TI = From->getTerminator(); + getFeasibleSuccessors(*TI, SuccFeasible, AggressiveUndef); + + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + if (TI->getSuccessor(i) == To && SuccFeasible[i]) + return true; + + return false; +} + +void SparseSolver::visitTerminatorInst(TerminatorInst &TI) { + SmallVector<bool, 16> SuccFeasible; + getFeasibleSuccessors(TI, SuccFeasible, true); + + BasicBlock *BB = TI.getParent(); + + // Mark all feasible successors executable... + for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i) + if (SuccFeasible[i]) + markEdgeExecutable(BB, TI.getSuccessor(i)); +} + +void SparseSolver::visitPHINode(PHINode &PN) { + // The lattice function may store more information on a PHINode than could be + // computed from its incoming values. For example, SSI form stores its sigma + // functions as PHINodes with a single incoming value. + if (LatticeFunc->IsSpecialCasedPHI(&PN)) { + LatticeVal IV = LatticeFunc->ComputeInstructionState(PN, *this); + if (IV != LatticeFunc->getUntrackedVal()) + UpdateState(PN, IV); + return; + } + + LatticeVal PNIV = getOrInitValueState(&PN); + LatticeVal Overdefined = LatticeFunc->getOverdefinedVal(); + + // If this value is already overdefined (common) just return. + if (PNIV == Overdefined || PNIV == LatticeFunc->getUntrackedVal()) + return; // Quick exit + + // Super-extra-high-degree PHI nodes are unlikely to ever be interesting, + // and slow us down a lot. Just mark them overdefined. + if (PN.getNumIncomingValues() > 64) { + UpdateState(PN, Overdefined); + return; + } + + // Look at all of the executable operands of the PHI node. If any of them + // are overdefined, the PHI becomes overdefined as well. Otherwise, ask the + // transfer function to give us the merge of the incoming values. + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { + // If the edge is not yet known to be feasible, it doesn't impact the PHI. + if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent(), true)) + continue; + + // Merge in this value. + LatticeVal OpVal = getOrInitValueState(PN.getIncomingValue(i)); + if (OpVal != PNIV) + PNIV = LatticeFunc->MergeValues(PNIV, OpVal); + + if (PNIV == Overdefined) + break; // Rest of input values don't matter. + } + + // Update the PHI with the compute value, which is the merge of the inputs. + UpdateState(PN, PNIV); +} + + +void SparseSolver::visitInst(Instruction &I) { + // PHIs are handled by the propagation logic, they are never passed into the + // transfer functions. + if (PHINode *PN = dyn_cast<PHINode>(&I)) + return visitPHINode(*PN); + + // Otherwise, ask the transfer function what the result is. If this is + // something that we care about, remember it. + LatticeVal IV = LatticeFunc->ComputeInstructionState(I, *this); + if (IV != LatticeFunc->getUntrackedVal()) + UpdateState(I, IV); + + if (TerminatorInst *TI = dyn_cast<TerminatorInst>(&I)) + visitTerminatorInst(*TI); +} + +void SparseSolver::Solve(Function &F) { + MarkBlockExecutable(&F.getEntryBlock()); + + // Process the work lists until they are empty! + while (!BBWorkList.empty() || !InstWorkList.empty()) { + // Process the instruction work list. + while (!InstWorkList.empty()) { + Instruction *I = InstWorkList.back(); + InstWorkList.pop_back(); + + DEBUG(dbgs() << "\nPopped off I-WL: " << *I << "\n"); + + // "I" got into the work list because it made a transition. See if any + // users are both live and in need of updating. + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) { + Instruction *U = cast<Instruction>(*UI); + if (BBExecutable.count(U->getParent())) // Inst is executable? + visitInst(*U); + } + } + + // Process the basic block work list. + while (!BBWorkList.empty()) { + BasicBlock *BB = BBWorkList.back(); + BBWorkList.pop_back(); + + DEBUG(dbgs() << "\nPopped off BBWL: " << *BB); + + // Notify all instructions in this basic block that they are newly + // executable. + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + visitInst(*I); + } + } +} + +void SparseSolver::Print(Function &F, raw_ostream &OS) const { + OS << "\nFUNCTION: " << F.getNameStr() << "\n"; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (!BBExecutable.count(BB)) + OS << "INFEASIBLE: "; + OS << "\t"; + if (BB->hasName()) + OS << BB->getNameStr() << ":\n"; + else + OS << "; anon bb\n"; + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + LatticeFunc->PrintValue(getLatticeState(I), OS); + OS << *I << "\n"; + } + + OS << "\n"; + } +} + diff --git a/contrib/llvm/lib/Analysis/Trace.cpp b/contrib/llvm/lib/Analysis/Trace.cpp new file mode 100644 index 0000000..68a39cd --- /dev/null +++ b/contrib/llvm/lib/Analysis/Trace.cpp @@ -0,0 +1,51 @@ +//===- Trace.cpp - Implementation of Trace class --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class represents a single trace of LLVM basic blocks. A trace is a +// single entry, multiple exit, region of code that is often hot. Trace-based +// optimizations treat traces almost like they are a large, strange, basic +// block: because the trace path is assumed to be hot, optimizations for the +// fall-through path are made at the expense of the non-fall-through paths. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Trace.h" +#include "llvm/Function.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +Function *Trace::getFunction() const { + return getEntryBasicBlock()->getParent(); +} + +Module *Trace::getModule() const { + return getFunction()->getParent(); +} + +/// print - Write trace to output stream. +/// +void Trace::print(raw_ostream &O) const { + Function *F = getFunction(); + O << "; Trace from function " << F->getNameStr() << ", blocks:\n"; + for (const_iterator i = begin(), e = end(); i != e; ++i) { + O << "; "; + WriteAsOperand(O, *i, true, getModule()); + O << "\n"; + } + O << "; Trace parent function: \n" << *F; +} + +/// dump - Debugger convenience method; writes trace to standard error +/// output stream. +/// +void Trace::dump() const { + print(dbgs()); +} diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp new file mode 100644 index 0000000..bbfdcec --- /dev/null +++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -0,0 +1,191 @@ +//===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the TypeBasedAliasAnalysis pass, which implements +// metadata-based TBAA. +// +// In LLVM IR, memory does not have types, so LLVM's own type system is not +// suitable for doing TBAA. Instead, metadata is added to the IR to describe +// a type system of a higher level language. +// +// This pass is language-independent. The type system is encoded in +// metadata. This allows this pass to support typical C and C++ TBAA, but +// it can also support custom aliasing behavior for other languages. +// +// This is a work-in-progress. It doesn't work yet, and the metadata +// format isn't stable. +// +// TODO: getModRefBehavior. The AliasAnalysis infrastructure will need to +// be extended. +// TODO: AA chaining +// TODO: struct fields +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Module.h" +#include "llvm/Metadata.h" +#include "llvm/Pass.h" +using namespace llvm; + +namespace { + /// TBAANode - This is a simple wrapper around an MDNode which provides a + /// higher-level interface by hiding the details of how alias analysis + /// information is encoded in its operands. + class TBAANode { + const MDNode *Node; + + public: + TBAANode() : Node(0) {} + explicit TBAANode(MDNode *N) : Node(N) {} + + /// getNode - Get the MDNode for this TBAANode. + const MDNode *getNode() const { return Node; } + + /// getParent - Get this TBAANode's Alias DAG parent. + TBAANode getParent() const { + if (Node->getNumOperands() < 2) + return TBAANode(); + MDNode *P = dyn_cast<MDNode>(Node->getOperand(1)); + if (!P) + return TBAANode(); + // Ok, this node has a valid parent. Return it. + return TBAANode(P); + } + + /// TypeIsImmutable - Test if this TBAANode represents a type for objects + /// which are not modified (by any means) in the context where this + /// AliasAnalysis is relevant. + bool TypeIsImmutable() const { + if (Node->getNumOperands() < 3) + return false; + ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(2)); + if (!CI) + return false; + // TODO: Think about the encoding. + return CI->isOne(); + } + }; +} + +namespace { + /// TypeBasedAliasAnalysis - This is a simple alias analysis + /// implementation that uses TypeBased to answer queries. + class TypeBasedAliasAnalysis : public ImmutablePass, + public AliasAnalysis { + public: + static char ID; // Class identification, replacement for typeinfo + TypeBasedAliasAnalysis() : ImmutablePass(ID) {} + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(const void *PI) { + if (PI == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + + private: + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual AliasResult alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size); + virtual bool pointsToConstantMemory(const Value *P); + }; +} // End of anonymous namespace + +// Register this pass... +char TypeBasedAliasAnalysis::ID = 0; +INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa", + "Type-Based Alias Analysis", false, true, false); + +ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() { + return new TypeBasedAliasAnalysis(); +} + +void +TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AliasAnalysis::getAnalysisUsage(AU); +} + +AliasAnalysis::AliasResult +TypeBasedAliasAnalysis::alias(const Value *A, unsigned ASize, + const Value *B, unsigned BSize) { + // Currently, metadata can only be attached to Instructions. + const Instruction *AI = dyn_cast<Instruction>(A); + if (!AI) return MayAlias; + const Instruction *BI = dyn_cast<Instruction>(B); + if (!BI) return MayAlias; + + // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must + // be conservative. + MDNode *AM = + AI->getMetadata(AI->getParent()->getParent()->getParent() + ->getMDKindID("tbaa")); + if (!AM) return MayAlias; + MDNode *BM = + BI->getMetadata(BI->getParent()->getParent()->getParent() + ->getMDKindID("tbaa")); + if (!BM) return MayAlias; + + // Keep track of the root node for A and B. + TBAANode RootA, RootB; + + // Climb the DAG from A to see if we reach B. + for (TBAANode T(AM); ; ) { + if (T.getNode() == BM) + // B is an ancestor of A. + return MayAlias; + + RootA = T; + T = T.getParent(); + if (!T.getNode()) + break; + } + + // Climb the DAG from B to see if we reach A. + for (TBAANode T(BM); ; ) { + if (T.getNode() == AM) + // A is an ancestor of B. + return MayAlias; + + RootB = T; + T = T.getParent(); + if (!T.getNode()) + break; + } + + // Neither node is an ancestor of the other. + + // If they have the same root, then we've proved there's no alias. + if (RootA.getNode() == RootB.getNode()) + return NoAlias; + + // If they have different roots, they're part of different potentially + // unrelated type systems, so we must be conservative. + return MayAlias; +} + +bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Value *P) { + // Currently, metadata can only be attached to Instructions. + const Instruction *I = dyn_cast<Instruction>(P); + if (!I) return false; + + MDNode *M = + I->getMetadata(I->getParent()->getParent()->getParent() + ->getMDKindID("tbaa")); + if (!M) return false; + + // If this is an "immutable" type, we can assume the pointer is pointing + // to constant memory. + return TBAANode(M).TypeIsImmutable(); +} diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp new file mode 100644 index 0000000..181c9b0 --- /dev/null +++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp @@ -0,0 +1,1388 @@ +//===- ValueTracking.cpp - Walk computations to compute properties --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains routines that help analyze properties that chains of +// computations have. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/GlobalVariable.h" +#include "llvm/GlobalAlias.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Operator.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include <cstring> +using namespace llvm; + +/// ComputeMaskedBits - Determine which of the bits specified in Mask are +/// known to be either zero or one and return them in the KnownZero/KnownOne +/// bit sets. This code only analyzes bits in Mask, in order to short-circuit +/// processing. +/// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that +/// we cannot optimize based on the assumption that it is zero without changing +/// it to be an explicit zero. If we don't change it to zero, other code could +/// optimized based on the contradictory assumption that it is non-zero. +/// Because instcombine aggressively folds operations with undef args anyway, +/// this won't lose us code quality. +/// +/// This function is defined on values with integer type, values with pointer +/// type (but only if TD is non-null), and vectors of integers. In the case +/// where V is a vector, the mask, known zero, and known one values are the +/// same width as the vector element, and the bit is set only if it is true +/// for all of the elements in the vector. +void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, + APInt &KnownZero, APInt &KnownOne, + const TargetData *TD, unsigned Depth) { + const unsigned MaxDepth = 6; + assert(V && "No Value?"); + assert(Depth <= MaxDepth && "Limit Search Depth"); + unsigned BitWidth = Mask.getBitWidth(); + assert((V->getType()->isIntOrIntVectorTy() || V->getType()->isPointerTy()) + && "Not integer or pointer type!"); + assert((!TD || + TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && + (!V->getType()->isIntOrIntVectorTy() || + V->getType()->getScalarSizeInBits() == BitWidth) && + KnownZero.getBitWidth() == BitWidth && + KnownOne.getBitWidth() == BitWidth && + "V, Mask, KnownOne and KnownZero should have same BitWidth"); + + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + // We know all of the bits for a constant! + KnownOne = CI->getValue() & Mask; + KnownZero = ~KnownOne & Mask; + return; + } + // Null and aggregate-zero are all-zeros. + if (isa<ConstantPointerNull>(V) || + isa<ConstantAggregateZero>(V)) { + KnownOne.clear(); + KnownZero = Mask; + return; + } + // Handle a constant vector by taking the intersection of the known bits of + // each element. + if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) { + KnownZero.set(); KnownOne.set(); + for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { + APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); + ComputeMaskedBits(CV->getOperand(i), Mask, KnownZero2, KnownOne2, + TD, Depth); + KnownZero &= KnownZero2; + KnownOne &= KnownOne2; + } + return; + } + // The address of an aligned GlobalValue has trailing zeros. + if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { + unsigned Align = GV->getAlignment(); + if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) { + const Type *ObjectType = GV->getType()->getElementType(); + // If the object is defined in the current Module, we'll be giving + // it the preferred alignment. Otherwise, we have to assume that it + // may only have the minimum ABI alignment. + if (!GV->isDeclaration() && !GV->mayBeOverridden()) + Align = TD->getPrefTypeAlignment(ObjectType); + else + Align = TD->getABITypeAlignment(ObjectType); + } + if (Align > 0) + KnownZero = Mask & APInt::getLowBitsSet(BitWidth, + CountTrailingZeros_32(Align)); + else + KnownZero.clear(); + KnownOne.clear(); + return; + } + // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has + // the bits of its aliasee. + if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { + if (GA->mayBeOverridden()) { + KnownZero.clear(); KnownOne.clear(); + } else { + ComputeMaskedBits(GA->getAliasee(), Mask, KnownZero, KnownOne, + TD, Depth+1); + } + return; + } + + KnownZero.clear(); KnownOne.clear(); // Start out not knowing anything. + + if (Depth == MaxDepth || Mask == 0) + return; // Limit search depth. + + Operator *I = dyn_cast<Operator>(V); + if (!I) return; + + APInt KnownZero2(KnownZero), KnownOne2(KnownOne); + switch (I->getOpcode()) { + default: break; + case Instruction::And: { + // If either the LHS or the RHS are Zero, the result is zero. + ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1); + APInt Mask2(Mask & ~KnownZero); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-1 bits are only known if set in both the LHS & RHS. + KnownOne &= KnownOne2; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + KnownZero |= KnownZero2; + return; + } + case Instruction::Or: { + ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1); + APInt Mask2(Mask & ~KnownOne); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are only known if clear in both the LHS & RHS. + KnownZero &= KnownZero2; + // Output known-1 are known to be set if set in either the LHS | RHS. + KnownOne |= KnownOne2; + return; + } + case Instruction::Xor: { + ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + KnownZero = KnownZeroOut; + return; + } + case Instruction::Mul: { + APInt Mask2 = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero, KnownOne, TD,Depth+1); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If low bits are zero in either operand, output low known-0 bits. + // Also compute a conserative estimate for high known-0 bits. + // More trickiness is possible, but this is sufficient for the + // interesting case of alignment computation. + KnownOne.clear(); + unsigned TrailZ = KnownZero.countTrailingOnes() + + KnownZero2.countTrailingOnes(); + unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + + KnownZero2.countLeadingOnes(), + BitWidth) - BitWidth; + + TrailZ = std::min(TrailZ, BitWidth); + LeadZ = std::min(LeadZ, BitWidth); + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | + APInt::getHighBitsSet(BitWidth, LeadZ); + KnownZero &= Mask; + return; + } + case Instruction::UDiv: { + // For the purposes of computing leading zeros we can conservatively + // treat a udiv as a logical right shift by the power of 2 known to + // be less than the denominator. + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(I->getOperand(0), + AllOnes, KnownZero2, KnownOne2, TD, Depth+1); + unsigned LeadZ = KnownZero2.countLeadingOnes(); + + KnownOne2.clear(); + KnownZero2.clear(); + ComputeMaskedBits(I->getOperand(1), + AllOnes, KnownZero2, KnownOne2, TD, Depth+1); + unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); + if (RHSUnknownLeadingOnes != BitWidth) + LeadZ = std::min(BitWidth, + LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); + + KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask; + return; + } + case Instruction::Select: + ComputeMaskedBits(I->getOperand(2), Mask, KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(1), Mask, KnownZero2, KnownOne2, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + return; + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::SIToFP: + case Instruction::UIToFP: + return; // Can't work with floating point. + case Instruction::PtrToInt: + case Instruction::IntToPtr: + // We can't handle these if we don't know the pointer size. + if (!TD) return; + // FALL THROUGH and handle them the same as zext/trunc. + case Instruction::ZExt: + case Instruction::Trunc: { + const Type *SrcTy = I->getOperand(0)->getType(); + + unsigned SrcBitWidth; + // Note that we handle pointer operands here because of inttoptr/ptrtoint + // which fall through here. + if (SrcTy->isPointerTy()) + SrcBitWidth = TD->getTypeSizeInBits(SrcTy); + else + SrcBitWidth = SrcTy->getScalarSizeInBits(); + + APInt MaskIn(Mask); + MaskIn.zextOrTrunc(SrcBitWidth); + KnownZero.zextOrTrunc(SrcBitWidth); + KnownOne.zextOrTrunc(SrcBitWidth); + ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD, + Depth+1); + KnownZero.zextOrTrunc(BitWidth); + KnownOne.zextOrTrunc(BitWidth); + // Any top bits are known to be zero. + if (BitWidth > SrcBitWidth) + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); + return; + } + case Instruction::BitCast: { + const Type *SrcTy = I->getOperand(0)->getType(); + if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + // TODO: For now, not handling conversions like: + // (bitcast i64 %x to <2 x i32>) + !I->getType()->isVectorTy()) { + ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, TD, + Depth+1); + return; + } + break; + } + case Instruction::SExt: { + // Compute the bits in the result that are not present in the input. + unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); + + APInt MaskIn(Mask); + MaskIn.trunc(SrcBitWidth); + KnownZero.trunc(SrcBitWidth); + KnownOne.trunc(SrcBitWidth); + ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero.zext(BitWidth); + KnownOne.zext(BitWidth); + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + if (KnownZero[SrcBitWidth-1]) // Input sign bit known zero + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); + else if (KnownOne[SrcBitWidth-1]) // Input sign bit known set + KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); + return; + } + case Instruction::Shl: + // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 + if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + APInt Mask2(Mask.lshr(ShiftAmt)); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero <<= ShiftAmt; + KnownOne <<= ShiftAmt; + KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0 + return; + } + break; + case Instruction::LShr: + // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 + if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { + // Compute the new bits that are at the top now. + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + + // Unsigned shift right. + APInt Mask2(Mask.shl(ShiftAmt)); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero,KnownOne, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); + KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); + // high bits known zero. + KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt); + return; + } + break; + case Instruction::AShr: + // (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 + if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { + // Compute the new bits that are at the top now. + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + + // Signed shift right. + APInt Mask2(Mask.shl(ShiftAmt)); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); + KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); + + APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); + if (KnownZero[BitWidth-ShiftAmt-1]) // New bits are known zero. + KnownZero |= HighBits; + else if (KnownOne[BitWidth-ShiftAmt-1]) // New bits are known one. + KnownOne |= HighBits; + return; + } + break; + case Instruction::Sub: { + if (ConstantInt *CLHS = dyn_cast<ConstantInt>(I->getOperand(0))) { + // We know that the top bits of C-X are clear if X contains less bits + // than C (i.e. no wrap-around can happen). For example, 20-X is + // positive if we can prove that X is >= 0 and < 16. + if (!CLHS->getValue().isNegative()) { + unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros(); + // NLZ can't be BitWidth with no sign bit + APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); + ComputeMaskedBits(I->getOperand(1), MaskV, KnownZero2, KnownOne2, + TD, Depth+1); + + // If all of the MaskV bits are known to be zero, then we know the + // output top bits are zero, because we now know that the output is + // from [0-C]. + if ((KnownZero2 & MaskV) == MaskV) { + unsigned NLZ2 = CLHS->getValue().countLeadingZeros(); + // Top bits known zero. + KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask; + } + } + } + } + // fall through + case Instruction::Add: { + // If one of the operands has trailing zeros, then the bits that the + // other operand has in those bit positions will be preserved in the + // result. For an add, this works with either operand. For a subtract, + // this only works if the known zeros are in the right operand. + APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + APInt Mask2 = APInt::getLowBitsSet(BitWidth, + BitWidth - Mask.countLeadingZeros()); + ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD, + Depth+1); + assert((LHSKnownZero & LHSKnownOne) == 0 && + "Bits known to be one AND zero?"); + unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes(); + + ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero2, KnownOne2, TD, + Depth+1); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes(); + + // Determine which operand has more trailing zeros, and use that + // many bits from the other operand. + if (LHSKnownZeroOut > RHSKnownZeroOut) { + if (I->getOpcode() == Instruction::Add) { + APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut); + KnownZero |= KnownZero2 & Mask; + KnownOne |= KnownOne2 & Mask; + } else { + // If the known zeros are in the left operand for a subtract, + // fall back to the minimum known zeros in both operands. + KnownZero |= APInt::getLowBitsSet(BitWidth, + std::min(LHSKnownZeroOut, + RHSKnownZeroOut)); + } + } else if (RHSKnownZeroOut >= LHSKnownZeroOut) { + APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut); + KnownZero |= LHSKnownZero & Mask; + KnownOne |= LHSKnownOne & Mask; + } + return; + } + case Instruction::SRem: + if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { + APInt RA = Rem->getValue().abs(); + if (RA.isPowerOf2()) { + APInt LowBits = RA - 1; + APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, + Depth+1); + + // The low bits of the first operand are unchanged by the srem. + KnownZero = KnownZero2 & LowBits; + KnownOne = KnownOne2 & LowBits; + + // If the first operand is non-negative or has all low bits zero, then + // the upper bits are all zero. + if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits)) + KnownZero |= ~LowBits; + + // If the first operand is negative and not all low bits are zero, then + // the upper bits are all one. + if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0)) + KnownOne |= ~LowBits; + + KnownZero &= Mask; + KnownOne &= Mask; + + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + } + } + break; + case Instruction::URem: { + if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { + APInt RA = Rem->getValue(); + if (RA.isPowerOf2()) { + APInt LowBits = (RA - 1); + APInt Mask2 = LowBits & Mask; + KnownZero |= ~LowBits & Mask; + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + break; + } + } + + // Since the result is less than or equal to either operand, any leading + // zero bits in either operand must also exist in the result. + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(I->getOperand(0), AllOnes, KnownZero, KnownOne, + TD, Depth+1); + ComputeMaskedBits(I->getOperand(1), AllOnes, KnownZero2, KnownOne2, + TD, Depth+1); + + unsigned Leaders = std::max(KnownZero.countLeadingOnes(), + KnownZero2.countLeadingOnes()); + KnownOne.clear(); + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask; + break; + } + + case Instruction::Alloca: { + AllocaInst *AI = cast<AllocaInst>(V); + unsigned Align = AI->getAlignment(); + if (Align == 0 && TD) + Align = TD->getABITypeAlignment(AI->getType()->getElementType()); + + if (Align > 0) + KnownZero = Mask & APInt::getLowBitsSet(BitWidth, + CountTrailingZeros_32(Align)); + break; + } + case Instruction::GetElementPtr: { + // Analyze all of the subscripts of this getelementptr instruction + // to determine if we can prove known low zero bits. + APInt LocalMask = APInt::getAllOnesValue(BitWidth); + APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0); + ComputeMaskedBits(I->getOperand(0), LocalMask, + LocalKnownZero, LocalKnownOne, TD, Depth+1); + unsigned TrailZ = LocalKnownZero.countTrailingOnes(); + + gep_type_iterator GTI = gep_type_begin(I); + for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { + Value *Index = I->getOperand(i); + if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + // Handle struct member offset arithmetic. + if (!TD) return; + const StructLayout *SL = TD->getStructLayout(STy); + unsigned Idx = cast<ConstantInt>(Index)->getZExtValue(); + uint64_t Offset = SL->getElementOffset(Idx); + TrailZ = std::min(TrailZ, + CountTrailingZeros_64(Offset)); + } else { + // Handle array index arithmetic. + const Type *IndexedTy = GTI.getIndexedType(); + if (!IndexedTy->isSized()) return; + unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits(); + uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1; + LocalMask = APInt::getAllOnesValue(GEPOpiBits); + LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0); + ComputeMaskedBits(Index, LocalMask, + LocalKnownZero, LocalKnownOne, TD, Depth+1); + TrailZ = std::min(TrailZ, + unsigned(CountTrailingZeros_64(TypeSize) + + LocalKnownZero.countTrailingOnes())); + } + } + + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) & Mask; + break; + } + case Instruction::PHI: { + PHINode *P = cast<PHINode>(I); + // Handle the case of a simple two-predecessor recurrence PHI. + // There's a lot more that could theoretically be done here, but + // this is sufficient to catch some interesting cases. + if (P->getNumIncomingValues() == 2) { + for (unsigned i = 0; i != 2; ++i) { + Value *L = P->getIncomingValue(i); + Value *R = P->getIncomingValue(!i); + Operator *LU = dyn_cast<Operator>(L); + if (!LU) + continue; + unsigned Opcode = LU->getOpcode(); + // Check for operations that have the property that if + // both their operands have low zero bits, the result + // will have low zero bits. + if (Opcode == Instruction::Add || + Opcode == Instruction::Sub || + Opcode == Instruction::And || + Opcode == Instruction::Or || + Opcode == Instruction::Mul) { + Value *LL = LU->getOperand(0); + Value *LR = LU->getOperand(1); + // Find a recurrence. + if (LL == I) + L = LR; + else if (LR == I) + L = LL; + else + break; + // Ok, we have a PHI of the form L op= R. Check for low + // zero bits. + APInt Mask2 = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(R, Mask2, KnownZero2, KnownOne2, TD, Depth+1); + Mask2 = APInt::getLowBitsSet(BitWidth, + KnownZero2.countTrailingOnes()); + + // We need to take the minimum number of known bits + APInt KnownZero3(KnownZero), KnownOne3(KnownOne); + ComputeMaskedBits(L, Mask2, KnownZero3, KnownOne3, TD, Depth+1); + + KnownZero = Mask & + APInt::getLowBitsSet(BitWidth, + std::min(KnownZero2.countTrailingOnes(), + KnownZero3.countTrailingOnes())); + break; + } + } + } + + // Otherwise take the unions of the known bit sets of the operands, + // taking conservative care to avoid excessive recursion. + if (Depth < MaxDepth - 1 && !KnownZero && !KnownOne) { + KnownZero = APInt::getAllOnesValue(BitWidth); + KnownOne = APInt::getAllOnesValue(BitWidth); + for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) { + // Skip direct self references. + if (P->getIncomingValue(i) == P) continue; + + KnownZero2 = APInt(BitWidth, 0); + KnownOne2 = APInt(BitWidth, 0); + // Recurse, but cap the recursion to one level, because we don't + // want to waste time spinning around in loops. + ComputeMaskedBits(P->getIncomingValue(i), KnownZero | KnownOne, + KnownZero2, KnownOne2, TD, MaxDepth-1); + KnownZero &= KnownZero2; + KnownOne &= KnownOne2; + // If all bits have been ruled out, there's no need to check + // more operands. + if (!KnownZero && !KnownOne) + break; + } + } + break; + } + case Instruction::Call: + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::ctpop: + case Intrinsic::ctlz: + case Intrinsic::cttz: { + unsigned LowBits = Log2_32(BitWidth)+1; + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + break; + } + } + } + break; + } +} + +/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use +/// this predicate to simplify operations downstream. Mask is known to be zero +/// for bits that V cannot have. +/// +/// This function is defined on values with integer type, values with pointer +/// type (but only if TD is non-null), and vectors of integers. In the case +/// where V is a vector, the mask, known zero, and known one values are the +/// same width as the vector element, and the bit is set only if it is true +/// for all of the elements in the vector. +bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, + const TargetData *TD, unsigned Depth) { + APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); + ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + return (KnownZero & Mask) == Mask; +} + + + +/// ComputeNumSignBits - Return the number of times the sign bit of the +/// register is replicated into the other bits. We know that at least 1 bit +/// is always equal to the sign bit (itself), but other cases can give us +/// information. For example, immediately after an "ashr X, 2", we know that +/// the top 3 bits are all equal to each other, so we return 3. +/// +/// 'Op' must have a scalar integer type. +/// +unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, + unsigned Depth) { + assert((TD || V->getType()->isIntOrIntVectorTy()) && + "ComputeNumSignBits requires a TargetData object to operate " + "on non-integer values!"); + const Type *Ty = V->getType(); + unsigned TyBits = TD ? TD->getTypeSizeInBits(V->getType()->getScalarType()) : + Ty->getScalarSizeInBits(); + unsigned Tmp, Tmp2; + unsigned FirstAnswer = 1; + + // Note that ConstantInt is handled by the general ComputeMaskedBits case + // below. + + if (Depth == 6) + return 1; // Limit search depth. + + Operator *U = dyn_cast<Operator>(V); + switch (Operator::getOpcode(V)) { + default: break; + case Instruction::SExt: + Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); + return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp; + + case Instruction::AShr: + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + // ashr X, C -> adds C sign bits. + if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) { + Tmp += C->getZExtValue(); + if (Tmp > TyBits) Tmp = TyBits; + } + return Tmp; + case Instruction::Shl: + if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) { + // shl destroys sign bits. + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + if (C->getZExtValue() >= TyBits || // Bad shift. + C->getZExtValue() >= Tmp) break; // Shifted all sign bits out. + return Tmp - C->getZExtValue(); + } + break; + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: // NOT is handled here. + // Logical binary ops preserve the number of sign bits at the worst. + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + if (Tmp != 1) { + Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); + FirstAnswer = std::min(Tmp, Tmp2); + // We computed what we know about the sign bits as our first + // answer. Now proceed to the generic code that uses + // ComputeMaskedBits, and pick whichever answer is better. + } + break; + + case Instruction::Select: + Tmp = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); + if (Tmp == 1) return 1; // Early out. + Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1); + return std::min(Tmp, Tmp2); + + case Instruction::Add: + // Add can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + if (Tmp == 1) return 1; // Early out. + + // Special case decrementing a value (ADD X, -1): + if (ConstantInt *CRHS = dyn_cast<ConstantInt>(U->getOperand(1))) + if (CRHS->isAllOnesValue()) { + APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); + APInt Mask = APInt::getAllOnesValue(TyBits); + ComputeMaskedBits(U->getOperand(0), Mask, KnownZero, KnownOne, TD, + Depth+1); + + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((KnownZero | APInt(TyBits, 1)) == Mask) + return TyBits; + + // If we are subtracting one from a positive number, there is no carry + // out of the result. + if (KnownZero.isNegative()) + return Tmp; + } + + Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); + if (Tmp2 == 1) return 1; + return std::min(Tmp, Tmp2)-1; + + case Instruction::Sub: + Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); + if (Tmp2 == 1) return 1; + + // Handle NEG. + if (ConstantInt *CLHS = dyn_cast<ConstantInt>(U->getOperand(0))) + if (CLHS->isNullValue()) { + APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); + APInt Mask = APInt::getAllOnesValue(TyBits); + ComputeMaskedBits(U->getOperand(1), Mask, KnownZero, KnownOne, + TD, Depth+1); + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((KnownZero | APInt(TyBits, 1)) == Mask) + return TyBits; + + // If the input is known to be positive (the sign bit is known clear), + // the output of the NEG has the same number of sign bits as the input. + if (KnownZero.isNegative()) + return Tmp2; + + // Otherwise, we treat this like a SUB. + } + + // Sub can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + if (Tmp == 1) return 1; // Early out. + return std::min(Tmp, Tmp2)-1; + + case Instruction::PHI: { + PHINode *PN = cast<PHINode>(U); + // Don't analyze large in-degree PHIs. + if (PN->getNumIncomingValues() > 4) break; + + // Take the minimum of all incoming values. This can't infinitely loop + // because of our depth threshold. + Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1); + for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { + if (Tmp == 1) return Tmp; + Tmp = std::min(Tmp, + ComputeNumSignBits(PN->getIncomingValue(i), TD, Depth+1)); + } + return Tmp; + } + + case Instruction::Trunc: + // FIXME: it's tricky to do anything useful for this, but it is an important + // case for targets like X86. + break; + } + + // Finally, if we can prove that the top bits of the result are 0's or 1's, + // use this information. + APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); + APInt Mask = APInt::getAllOnesValue(TyBits); + ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth); + + if (KnownZero.isNegative()) { // sign bit is 0 + Mask = KnownZero; + } else if (KnownOne.isNegative()) { // sign bit is 1; + Mask = KnownOne; + } else { + // Nothing known. + return FirstAnswer; + } + + // Okay, we know that the sign bit in Mask is set. Use CLZ to determine + // the number of identical bits in the top of the input value. + Mask = ~Mask; + Mask <<= Mask.getBitWidth()-TyBits; + // Return # leading zeros. We use 'min' here in case Val was zero before + // shifting. We don't want to return '64' as for an i32 "0". + return std::max(FirstAnswer, std::min(TyBits, Mask.countLeadingZeros())); +} + +/// ComputeMultiple - This function computes the integer multiple of Base that +/// equals V. If successful, it returns true and returns the multiple in +/// Multiple. If unsuccessful, it returns false. It looks +/// through SExt instructions only if LookThroughSExt is true. +bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, + bool LookThroughSExt, unsigned Depth) { + const unsigned MaxDepth = 6; + + assert(V && "No Value?"); + assert(Depth <= MaxDepth && "Limit Search Depth"); + assert(V->getType()->isIntegerTy() && "Not integer or pointer type!"); + + const Type *T = V->getType(); + + ConstantInt *CI = dyn_cast<ConstantInt>(V); + + if (Base == 0) + return false; + + if (Base == 1) { + Multiple = V; + return true; + } + + ConstantExpr *CO = dyn_cast<ConstantExpr>(V); + Constant *BaseVal = ConstantInt::get(T, Base); + if (CO && CO == BaseVal) { + // Multiple is 1. + Multiple = ConstantInt::get(T, 1); + return true; + } + + if (CI && CI->getZExtValue() % Base == 0) { + Multiple = ConstantInt::get(T, CI->getZExtValue() / Base); + return true; + } + + if (Depth == MaxDepth) return false; // Limit search depth. + + Operator *I = dyn_cast<Operator>(V); + if (!I) return false; + + switch (I->getOpcode()) { + default: break; + case Instruction::SExt: + if (!LookThroughSExt) return false; + // otherwise fall through to ZExt + case Instruction::ZExt: + return ComputeMultiple(I->getOperand(0), Base, Multiple, + LookThroughSExt, Depth+1); + case Instruction::Shl: + case Instruction::Mul: { + Value *Op0 = I->getOperand(0); + Value *Op1 = I->getOperand(1); + + if (I->getOpcode() == Instruction::Shl) { + ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1); + if (!Op1CI) return false; + // Turn Op0 << Op1 into Op0 * 2^Op1 + APInt Op1Int = Op1CI->getValue(); + uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1); + Op1 = ConstantInt::get(V->getContext(), + APInt(Op1Int.getBitWidth(), 0).set(BitToSet)); + } + + Value *Mul0 = NULL; + if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) { + if (Constant *Op1C = dyn_cast<Constant>(Op1)) + if (Constant *MulC = dyn_cast<Constant>(Mul0)) { + if (Op1C->getType()->getPrimitiveSizeInBits() < + MulC->getType()->getPrimitiveSizeInBits()) + Op1C = ConstantExpr::getZExt(Op1C, MulC->getType()); + if (Op1C->getType()->getPrimitiveSizeInBits() > + MulC->getType()->getPrimitiveSizeInBits()) + MulC = ConstantExpr::getZExt(MulC, Op1C->getType()); + + // V == Base * (Mul0 * Op1), so return (Mul0 * Op1) + Multiple = ConstantExpr::getMul(MulC, Op1C); + return true; + } + + if (ConstantInt *Mul0CI = dyn_cast<ConstantInt>(Mul0)) + if (Mul0CI->getValue() == 1) { + // V == Base * Op1, so return Op1 + Multiple = Op1; + return true; + } + } + + Value *Mul1 = NULL; + if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) { + if (Constant *Op0C = dyn_cast<Constant>(Op0)) + if (Constant *MulC = dyn_cast<Constant>(Mul1)) { + if (Op0C->getType()->getPrimitiveSizeInBits() < + MulC->getType()->getPrimitiveSizeInBits()) + Op0C = ConstantExpr::getZExt(Op0C, MulC->getType()); + if (Op0C->getType()->getPrimitiveSizeInBits() > + MulC->getType()->getPrimitiveSizeInBits()) + MulC = ConstantExpr::getZExt(MulC, Op0C->getType()); + + // V == Base * (Mul1 * Op0), so return (Mul1 * Op0) + Multiple = ConstantExpr::getMul(MulC, Op0C); + return true; + } + + if (ConstantInt *Mul1CI = dyn_cast<ConstantInt>(Mul1)) + if (Mul1CI->getValue() == 1) { + // V == Base * Op0, so return Op0 + Multiple = Op0; + return true; + } + } + } + } + + // We could not determine if V is a multiple of Base. + return false; +} + +/// CannotBeNegativeZero - Return true if we can prove that the specified FP +/// value is never equal to -0.0. +/// +/// NOTE: this function will need to be revisited when we support non-default +/// rounding modes! +/// +bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) + return !CFP->getValueAPF().isNegZero(); + + if (Depth == 6) + return 1; // Limit search depth. + + const Operator *I = dyn_cast<Operator>(V); + if (I == 0) return false; + + // (add x, 0.0) is guaranteed to return +0.0, not -0.0. + if (I->getOpcode() == Instruction::FAdd && + isa<ConstantFP>(I->getOperand(1)) && + cast<ConstantFP>(I->getOperand(1))->isNullValue()) + return true; + + // sitofp and uitofp turn into +0.0 for zero. + if (isa<SIToFPInst>(I) || isa<UIToFPInst>(I)) + return true; + + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + // sqrt(-0.0) = -0.0, no other negative results are possible. + if (II->getIntrinsicID() == Intrinsic::sqrt) + return CannotBeNegativeZero(II->getArgOperand(0), Depth+1); + + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (const Function *F = CI->getCalledFunction()) { + if (F->isDeclaration()) { + // abs(x) != -0.0 + if (F->getName() == "abs") return true; + // fabs[lf](x) != -0.0 + if (F->getName() == "fabs") return true; + if (F->getName() == "fabsf") return true; + if (F->getName() == "fabsl") return true; + if (F->getName() == "sqrt" || F->getName() == "sqrtf" || + F->getName() == "sqrtl") + return CannotBeNegativeZero(CI->getArgOperand(0), Depth+1); + } + } + + return false; +} + +// This is the recursive version of BuildSubAggregate. It takes a few different +// arguments. Idxs is the index within the nested struct From that we are +// looking at now (which is of type IndexedType). IdxSkip is the number of +// indices from Idxs that should be left out when inserting into the resulting +// struct. To is the result struct built so far, new insertvalue instructions +// build on that. +static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, + SmallVector<unsigned, 10> &Idxs, + unsigned IdxSkip, + Instruction *InsertBefore) { + const llvm::StructType *STy = llvm::dyn_cast<llvm::StructType>(IndexedType); + if (STy) { + // Save the original To argument so we can modify it + Value *OrigTo = To; + // General case, the type indexed by Idxs is a struct + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + // Process each struct element recursively + Idxs.push_back(i); + Value *PrevTo = To; + To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip, + InsertBefore); + Idxs.pop_back(); + if (!To) { + // Couldn't find any inserted value for this index? Cleanup + while (PrevTo != OrigTo) { + InsertValueInst* Del = cast<InsertValueInst>(PrevTo); + PrevTo = Del->getAggregateOperand(); + Del->eraseFromParent(); + } + // Stop processing elements + break; + } + } + // If we succesfully found a value for each of our subaggregates + if (To) + return To; + } + // Base case, the type indexed by SourceIdxs is not a struct, or not all of + // the struct's elements had a value that was inserted directly. In the latter + // case, perhaps we can't determine each of the subelements individually, but + // we might be able to find the complete struct somewhere. + + // Find the value that is at that particular spot + Value *V = FindInsertedValue(From, Idxs.begin(), Idxs.end()); + + if (!V) + return NULL; + + // Insert the value in the new (sub) aggregrate + return llvm::InsertValueInst::Create(To, V, Idxs.begin() + IdxSkip, + Idxs.end(), "tmp", InsertBefore); +} + +// This helper takes a nested struct and extracts a part of it (which is again a +// struct) into a new value. For example, given the struct: +// { a, { b, { c, d }, e } } +// and the indices "1, 1" this returns +// { c, d }. +// +// It does this by inserting an insertvalue for each element in the resulting +// struct, as opposed to just inserting a single struct. This will only work if +// each of the elements of the substruct are known (ie, inserted into From by an +// insertvalue instruction somewhere). +// +// All inserted insertvalue instructions are inserted before InsertBefore +static Value *BuildSubAggregate(Value *From, const unsigned *idx_begin, + const unsigned *idx_end, + Instruction *InsertBefore) { + assert(InsertBefore && "Must have someplace to insert!"); + const Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(), + idx_begin, + idx_end); + Value *To = UndefValue::get(IndexedType); + SmallVector<unsigned, 10> Idxs(idx_begin, idx_end); + unsigned IdxSkip = Idxs.size(); + + return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore); +} + +/// FindInsertedValue - Given an aggregrate and an sequence of indices, see if +/// the scalar value indexed is already around as a register, for example if it +/// were inserted directly into the aggregrate. +/// +/// If InsertBefore is not null, this function will duplicate (modified) +/// insertvalues when a part of a nested struct is extracted. +Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, + const unsigned *idx_end, Instruction *InsertBefore) { + // Nothing to index? Just return V then (this is useful at the end of our + // recursion) + if (idx_begin == idx_end) + return V; + // We have indices, so V should have an indexable type + assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) + && "Not looking at a struct or array?"); + assert(ExtractValueInst::getIndexedType(V->getType(), idx_begin, idx_end) + && "Invalid indices for type?"); + const CompositeType *PTy = cast<CompositeType>(V->getType()); + + if (isa<UndefValue>(V)) + return UndefValue::get(ExtractValueInst::getIndexedType(PTy, + idx_begin, + idx_end)); + else if (isa<ConstantAggregateZero>(V)) + return Constant::getNullValue(ExtractValueInst::getIndexedType(PTy, + idx_begin, + idx_end)); + else if (Constant *C = dyn_cast<Constant>(V)) { + if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) + // Recursively process this constant + return FindInsertedValue(C->getOperand(*idx_begin), idx_begin + 1, + idx_end, InsertBefore); + } else if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) { + // Loop the indices for the insertvalue instruction in parallel with the + // requested indices + const unsigned *req_idx = idx_begin; + for (const unsigned *i = I->idx_begin(), *e = I->idx_end(); + i != e; ++i, ++req_idx) { + if (req_idx == idx_end) { + if (InsertBefore) + // The requested index identifies a part of a nested aggregate. Handle + // this specially. For example, + // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0 + // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1 + // %C = extractvalue {i32, { i32, i32 } } %B, 1 + // This can be changed into + // %A = insertvalue {i32, i32 } undef, i32 10, 0 + // %C = insertvalue {i32, i32 } %A, i32 11, 1 + // which allows the unused 0,0 element from the nested struct to be + // removed. + return BuildSubAggregate(V, idx_begin, req_idx, InsertBefore); + else + // We can't handle this without inserting insertvalues + return 0; + } + + // This insert value inserts something else than what we are looking for. + // See if the (aggregrate) value inserted into has the value we are + // looking for, then. + if (*req_idx != *i) + return FindInsertedValue(I->getAggregateOperand(), idx_begin, idx_end, + InsertBefore); + } + // If we end up here, the indices of the insertvalue match with those + // requested (though possibly only partially). Now we recursively look at + // the inserted value, passing any remaining indices. + return FindInsertedValue(I->getInsertedValueOperand(), req_idx, idx_end, + InsertBefore); + } else if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) { + // If we're extracting a value from an aggregrate that was extracted from + // something else, we can extract from that something else directly instead. + // However, we will need to chain I's indices with the requested indices. + + // Calculate the number of indices required + unsigned size = I->getNumIndices() + (idx_end - idx_begin); + // Allocate some space to put the new indices in + SmallVector<unsigned, 5> Idxs; + Idxs.reserve(size); + // Add indices from the extract value instruction + for (const unsigned *i = I->idx_begin(), *e = I->idx_end(); + i != e; ++i) + Idxs.push_back(*i); + + // Add requested indices + for (const unsigned *i = idx_begin, *e = idx_end; i != e; ++i) + Idxs.push_back(*i); + + assert(Idxs.size() == size + && "Number of indices added not correct?"); + + return FindInsertedValue(I->getAggregateOperand(), Idxs.begin(), Idxs.end(), + InsertBefore); + } + // Otherwise, we don't know (such as, extracting from a function return value + // or load instruction) + return 0; +} + +/// GetConstantStringInfo - This function computes the length of a +/// null-terminated C string pointed to by V. If successful, it returns true +/// and returns the string in Str. If unsuccessful, it returns false. +bool llvm::GetConstantStringInfo(const Value *V, std::string &Str, + uint64_t Offset, + bool StopAtNul) { + // If V is NULL then return false; + if (V == NULL) return false; + + // Look through bitcast instructions. + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V)) + return GetConstantStringInfo(BCI->getOperand(0), Str, Offset, StopAtNul); + + // If the value is not a GEP instruction nor a constant expression with a + // GEP instruction, then return false because ConstantArray can't occur + // any other way + const User *GEP = 0; + if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) { + GEP = GEPI; + } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + if (CE->getOpcode() == Instruction::BitCast) + return GetConstantStringInfo(CE->getOperand(0), Str, Offset, StopAtNul); + if (CE->getOpcode() != Instruction::GetElementPtr) + return false; + GEP = CE; + } + + if (GEP) { + // Make sure the GEP has exactly three arguments. + if (GEP->getNumOperands() != 3) + return false; + + // Make sure the index-ee is a pointer to array of i8. + const PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType()); + const ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType()); + if (AT == 0 || !AT->getElementType()->isIntegerTy(8)) + return false; + + // Check to make sure that the first operand of the GEP is an integer and + // has value 0 so that we are sure we're indexing into the initializer. + const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1)); + if (FirstIdx == 0 || !FirstIdx->isZero()) + return false; + + // If the second index isn't a ConstantInt, then this is a variable index + // into the array. If this occurs, we can't say anything meaningful about + // the string. + uint64_t StartIdx = 0; + if (const ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2))) + StartIdx = CI->getZExtValue(); + else + return false; + return GetConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset, + StopAtNul); + } + + // The GEP instruction, constant or instruction, must reference a global + // variable that is a constant and is initialized. The referenced constant + // initializer is the array that we'll use for optimization. + const GlobalVariable* GV = dyn_cast<GlobalVariable>(V); + if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) + return false; + const Constant *GlobalInit = GV->getInitializer(); + + // Handle the ConstantAggregateZero case + if (isa<ConstantAggregateZero>(GlobalInit)) { + // This is a degenerate case. The initializer is constant zero so the + // length of the string must be zero. + Str.clear(); + return true; + } + + // Must be a Constant Array + const ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit); + if (Array == 0 || !Array->getType()->getElementType()->isIntegerTy(8)) + return false; + + // Get the number of elements in the array + uint64_t NumElts = Array->getType()->getNumElements(); + + if (Offset > NumElts) + return false; + + // Traverse the constant array from 'Offset' which is the place the GEP refers + // to in the array. + Str.reserve(NumElts-Offset); + for (unsigned i = Offset; i != NumElts; ++i) { + const Constant *Elt = Array->getOperand(i); + const ConstantInt *CI = dyn_cast<ConstantInt>(Elt); + if (!CI) // This array isn't suitable, non-int initializer. + return false; + if (StopAtNul && CI->isZero()) + return true; // we found end of string, success! + Str += (char)CI->getZExtValue(); + } + + // The array isn't null terminated, but maybe this is a memcpy, not a strcpy. + return true; +} + +// These next two are very similar to the above, but also look through PHI +// nodes. +// TODO: See if we can integrate these two together. + +/// GetStringLengthH - If we can compute the length of the string pointed to by +/// the specified pointer, return 'len+1'. If we can't, return 0. +static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) { + // Look through noop bitcast instructions. + if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) + return GetStringLengthH(BCI->getOperand(0), PHIs); + + // If this is a PHI node, there are two cases: either we have already seen it + // or we haven't. + if (PHINode *PN = dyn_cast<PHINode>(V)) { + if (!PHIs.insert(PN)) + return ~0ULL; // already in the set. + + // If it was new, see if all the input strings are the same length. + uint64_t LenSoFar = ~0ULL; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs); + if (Len == 0) return 0; // Unknown length -> unknown. + + if (Len == ~0ULL) continue; + + if (Len != LenSoFar && LenSoFar != ~0ULL) + return 0; // Disagree -> unknown. + LenSoFar = Len; + } + + // Success, all agree. + return LenSoFar; + } + + // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y) + if (SelectInst *SI = dyn_cast<SelectInst>(V)) { + uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs); + if (Len1 == 0) return 0; + uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs); + if (Len2 == 0) return 0; + if (Len1 == ~0ULL) return Len2; + if (Len2 == ~0ULL) return Len1; + if (Len1 != Len2) return 0; + return Len1; + } + + // If the value is not a GEP instruction nor a constant expression with a + // GEP instruction, then return unknown. + User *GEP = 0; + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) { + GEP = GEPI; + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + if (CE->getOpcode() != Instruction::GetElementPtr) + return 0; + GEP = CE; + } else { + return 0; + } + + // Make sure the GEP has exactly three arguments. + if (GEP->getNumOperands() != 3) + return 0; + + // Check to make sure that the first operand of the GEP is an integer and + // has value 0 so that we are sure we're indexing into the initializer. + if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) { + if (!Idx->isZero()) + return 0; + } else + return 0; + + // If the second index isn't a ConstantInt, then this is a variable index + // into the array. If this occurs, we can't say anything meaningful about + // the string. + uint64_t StartIdx = 0; + if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2))) + StartIdx = CI->getZExtValue(); + else + return 0; + + // The GEP instruction, constant or instruction, must reference a global + // variable that is a constant and is initialized. The referenced constant + // initializer is the array that we'll use for optimization. + GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)); + if (!GV || !GV->isConstant() || !GV->hasInitializer() || + GV->mayBeOverridden()) + return 0; + Constant *GlobalInit = GV->getInitializer(); + + // Handle the ConstantAggregateZero case, which is a degenerate case. The + // initializer is constant zero so the length of the string must be zero. + if (isa<ConstantAggregateZero>(GlobalInit)) + return 1; // Len = 0 offset by 1. + + // Must be a Constant Array + ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit); + if (!Array || !Array->getType()->getElementType()->isIntegerTy(8)) + return false; + + // Get the number of elements in the array + uint64_t NumElts = Array->getType()->getNumElements(); + + // Traverse the constant array from StartIdx (derived above) which is + // the place the GEP refers to in the array. + for (unsigned i = StartIdx; i != NumElts; ++i) { + Constant *Elt = Array->getOperand(i); + ConstantInt *CI = dyn_cast<ConstantInt>(Elt); + if (!CI) // This array isn't suitable, non-int initializer. + return 0; + if (CI->isZero()) + return i-StartIdx+1; // We found end of string, success! + } + + return 0; // The array isn't null terminated, conservatively return 'unknown'. +} + +/// GetStringLength - If we can compute the length of the string pointed to by +/// the specified pointer, return 'len+1'. If we can't, return 0. +uint64_t llvm::GetStringLength(Value *V) { + if (!V->getType()->isPointerTy()) return 0; + + SmallPtrSet<PHINode*, 32> PHIs; + uint64_t Len = GetStringLengthH(V, PHIs); + // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return + // an empty string as a length. + return Len == ~0ULL ? 1 : Len; +} |