diff options
Diffstat (limited to 'contrib/llvm/lib/Analysis')
62 files changed, 34382 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp new file mode 100644 index 0000000..c189a00 --- /dev/null +++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp @@ -0,0 +1,402 @@ +//===- AliasAnalysis.cpp - Generic Alias Analysis Interface Implementation -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the generic AliasAnalysis interface which is used as the +// common interface used by all clients and implementations of alias analysis. +// +// This file also implements the default version of the AliasAnalysis interface +// that is to be used when no other implementation is specified. This does some +// simple tests that detect obvious cases: two different global pointers cannot +// alias, a global cannot alias a malloc, two different mallocs cannot alias, +// etc. +// +// This alias analysis implementation really isn't very good for anything, but +// it is very fast, and makes a nice clean default implementation. Because it +// handles lots of little corner cases, other, more complex, alias analysis +// implementations may choose to rely on this pass to resolve these simple and +// easy cases. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Pass.h" +#include "llvm/BasicBlock.h" +#include "llvm/Function.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" +#include "llvm/Type.h" +#include "llvm/Target/TargetData.h" +using namespace llvm; + +// Register the AliasAnalysis interface, providing a nice name to refer to. +INITIALIZE_ANALYSIS_GROUP(AliasAnalysis, "Alias Analysis", NoAA) +char AliasAnalysis::ID = 0; + +//===----------------------------------------------------------------------===// +// Default chaining methods +//===----------------------------------------------------------------------===// + +AliasAnalysis::AliasResult +AliasAnalysis::alias(const Location &LocA, const Location &LocB) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + return AA->alias(LocA, LocB); +} + +bool AliasAnalysis::pointsToConstantMemory(const Location &Loc, + bool OrLocal) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + return AA->pointsToConstantMemory(Loc, OrLocal); +} + +void AliasAnalysis::deleteValue(Value *V) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + AA->deleteValue(V); +} + +void AliasAnalysis::copyValue(Value *From, Value *To) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + AA->copyValue(From, To); +} + +void AliasAnalysis::addEscapingUse(Use &U) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + AA->addEscapingUse(U); +} + + +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(ImmutableCallSite CS, + const Location &Loc) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + + ModRefBehavior MRB = getModRefBehavior(CS); + if (MRB == DoesNotAccessMemory) + return NoModRef; + + ModRefResult Mask = ModRef; + if (onlyReadsMemory(MRB)) + Mask = Ref; + + if (onlyAccessesArgPointees(MRB)) { + bool doesAlias = false; + if (doesAccessArgPointees(MRB)) { + MDNode *CSTag = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa); + for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); + AI != AE; ++AI) { + const Value *Arg = *AI; + if (!Arg->getType()->isPointerTy()) + continue; + Location CSLoc(Arg, UnknownSize, CSTag); + if (!isNoAlias(CSLoc, Loc)) { + doesAlias = true; + break; + } + } + } + if (!doesAlias) + return NoModRef; + } + + // If Loc is a constant memory location, the call definitely could not + // modify the memory location. + if ((Mask & Mod) && pointsToConstantMemory(Loc)) + Mask = ModRefResult(Mask & ~Mod); + + // If this is the end of the chain, don't forward. + if (!AA) return Mask; + + // Otherwise, fall back to the next AA in the chain. But we can merge + // in any mask we've managed to compute. + return ModRefResult(AA->getModRefInfo(CS, Loc) & Mask); +} + +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + + // If CS1 or CS2 are readnone, they don't interact. + ModRefBehavior CS1B = getModRefBehavior(CS1); + if (CS1B == DoesNotAccessMemory) return NoModRef; + + ModRefBehavior CS2B = getModRefBehavior(CS2); + if (CS2B == DoesNotAccessMemory) return NoModRef; + + // If they both only read from memory, there is no dependence. + if (onlyReadsMemory(CS1B) && onlyReadsMemory(CS2B)) + return NoModRef; + + AliasAnalysis::ModRefResult Mask = ModRef; + + // If CS1 only reads memory, the only dependence on CS2 can be + // from CS1 reading memory written by CS2. + if (onlyReadsMemory(CS1B)) + Mask = ModRefResult(Mask & Ref); + + // If CS2 only access memory through arguments, accumulate the mod/ref + // information from CS1's references to the memory referenced by + // CS2's arguments. + if (onlyAccessesArgPointees(CS2B)) { + AliasAnalysis::ModRefResult R = NoModRef; + if (doesAccessArgPointees(CS2B)) { + MDNode *CS2Tag = CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa); + for (ImmutableCallSite::arg_iterator + I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) { + const Value *Arg = *I; + if (!Arg->getType()->isPointerTy()) + continue; + Location CS2Loc(Arg, UnknownSize, CS2Tag); + R = ModRefResult((R | getModRefInfo(CS1, CS2Loc)) & Mask); + if (R == Mask) + break; + } + } + return R; + } + + // If CS1 only accesses memory through arguments, check if CS2 references + // any of the memory referenced by CS1's arguments. If not, return NoModRef. + if (onlyAccessesArgPointees(CS1B)) { + AliasAnalysis::ModRefResult R = NoModRef; + if (doesAccessArgPointees(CS1B)) { + MDNode *CS1Tag = CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa); + for (ImmutableCallSite::arg_iterator + I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) { + const Value *Arg = *I; + if (!Arg->getType()->isPointerTy()) + continue; + Location CS1Loc(Arg, UnknownSize, CS1Tag); + if (getModRefInfo(CS2, CS1Loc) != NoModRef) { + R = Mask; + break; + } + } + } + if (R == NoModRef) + return R; + } + + // If this is the end of the chain, don't forward. + if (!AA) return Mask; + + // Otherwise, fall back to the next AA in the chain. But we can merge + // in any mask we've managed to compute. + return ModRefResult(AA->getModRefInfo(CS1, CS2) & Mask); +} + +AliasAnalysis::ModRefBehavior +AliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + + ModRefBehavior Min = UnknownModRefBehavior; + + // Call back into the alias analysis with the other form of getModRefBehavior + // to see if it can give a better response. + if (const Function *F = CS.getCalledFunction()) + Min = getModRefBehavior(F); + + // If this is the end of the chain, don't forward. + if (!AA) return Min; + + // Otherwise, fall back to the next AA in the chain. But we can merge + // in any result we've managed to compute. + return ModRefBehavior(AA->getModRefBehavior(CS) & Min); +} + +AliasAnalysis::ModRefBehavior +AliasAnalysis::getModRefBehavior(const Function *F) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + return AA->getModRefBehavior(F); +} + +//===----------------------------------------------------------------------===// +// AliasAnalysis non-virtual helper method implementation +//===----------------------------------------------------------------------===// + +AliasAnalysis::Location AliasAnalysis::getLocation(const LoadInst *LI) { + return Location(LI->getPointerOperand(), + getTypeStoreSize(LI->getType()), + LI->getMetadata(LLVMContext::MD_tbaa)); +} + +AliasAnalysis::Location AliasAnalysis::getLocation(const StoreInst *SI) { + return Location(SI->getPointerOperand(), + getTypeStoreSize(SI->getValueOperand()->getType()), + SI->getMetadata(LLVMContext::MD_tbaa)); +} + +AliasAnalysis::Location AliasAnalysis::getLocation(const VAArgInst *VI) { + return Location(VI->getPointerOperand(), + UnknownSize, + VI->getMetadata(LLVMContext::MD_tbaa)); +} + + +AliasAnalysis::Location +AliasAnalysis::getLocationForSource(const MemTransferInst *MTI) { + uint64_t Size = UnknownSize; + if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength())) + Size = C->getValue().getZExtValue(); + + // memcpy/memmove can have TBAA tags. For memcpy, they apply + // to both the source and the destination. + MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa); + + return Location(MTI->getRawSource(), Size, TBAATag); +} + +AliasAnalysis::Location +AliasAnalysis::getLocationForDest(const MemIntrinsic *MTI) { + uint64_t Size = UnknownSize; + if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength())) + Size = C->getValue().getZExtValue(); + + // memcpy/memmove can have TBAA tags. For memcpy, they apply + // to both the source and the destination. + MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa); + + return Location(MTI->getRawDest(), Size, TBAATag); +} + + + +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) { + // Be conservative in the face of volatile. + if (L->isVolatile()) + return ModRef; + + // If the load address doesn't alias the given address, it doesn't read + // or write the specified memory. + if (!alias(getLocation(L), Loc)) + return NoModRef; + + // Otherwise, a load just reads. + return Ref; +} + +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) { + // Be conservative in the face of volatile. + if (S->isVolatile()) + return ModRef; + + // If the store address cannot alias the pointer in question, then the + // specified memory cannot be modified by the store. + if (!alias(getLocation(S), Loc)) + return NoModRef; + + // If the pointer is a pointer to constant memory, then it could not have been + // modified by this store. + if (pointsToConstantMemory(Loc)) + return NoModRef; + + // Otherwise, a store just writes. + return Mod; +} + +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) { + // If the va_arg address cannot alias the pointer in question, then the + // specified memory cannot be accessed by the va_arg. + if (!alias(getLocation(V), Loc)) + return NoModRef; + + // If the pointer is a pointer to constant memory, then it could not have been + // modified by this va_arg. + if (pointsToConstantMemory(Loc)) + return NoModRef; + + // Otherwise, a va_arg reads and writes. + return ModRef; +} + +// AliasAnalysis destructor: DO NOT move this to the header file for +// AliasAnalysis or else clients of the AliasAnalysis class may not depend on +// the AliasAnalysis.o file in the current .a file, causing alias analysis +// support to not be included in the tool correctly! +// +AliasAnalysis::~AliasAnalysis() {} + +/// InitializeAliasAnalysis - Subclasses must call this method to initialize the +/// AliasAnalysis interface before any other methods are called. +/// +void AliasAnalysis::InitializeAliasAnalysis(Pass *P) { + TD = P->getAnalysisIfAvailable<TargetData>(); + AA = &P->getAnalysis<AliasAnalysis>(); +} + +// getAnalysisUsage - All alias analysis implementations should invoke this +// directly (using AliasAnalysis::getAnalysisUsage(AU)). +void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AliasAnalysis>(); // All AA's chain +} + +/// getTypeStoreSize - Return the TargetData store size for the given type, +/// if known, or a conservative value otherwise. +/// +uint64_t AliasAnalysis::getTypeStoreSize(const Type *Ty) { + return TD ? TD->getTypeStoreSize(Ty) : UnknownSize; +} + +/// canBasicBlockModify - Return true if it is possible for execution of the +/// specified basic block to modify the value pointed to by Ptr. +/// +bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB, + const Location &Loc) { + return canInstructionRangeModify(BB.front(), BB.back(), Loc); +} + +/// canInstructionRangeModify - Return true if it is possible for the execution +/// of the specified instructions to modify the value pointed to by Ptr. The +/// instructions to consider are all of the instructions in the range of [I1,I2] +/// INCLUSIVE. I1 and I2 must be in the same basic block. +/// +bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1, + const Instruction &I2, + const Location &Loc) { + assert(I1.getParent() == I2.getParent() && + "Instructions not in same basic block!"); + BasicBlock::const_iterator I = &I1; + BasicBlock::const_iterator E = &I2; + ++E; // Convert from inclusive to exclusive range. + + for (; I != E; ++I) // Check every instruction in range + if (getModRefInfo(I, Loc) & Mod) + return true; + return false; +} + +/// isNoAliasCall - Return true if this pointer is returned by a noalias +/// function. +bool llvm::isNoAliasCall(const Value *V) { + if (isa<CallInst>(V) || isa<InvokeInst>(V)) + return ImmutableCallSite(cast<Instruction>(V)) + .paramHasAttr(0, Attribute::NoAlias); + return false; +} + +/// isIdentifiedObject - Return true if this pointer refers to a distinct and +/// identifiable object. This returns true for: +/// Global Variables and Functions (but not Global Aliases) +/// Allocas and Mallocs +/// ByVal and NoAlias Arguments +/// NoAlias returns +/// +bool llvm::isIdentifiedObject(const Value *V) { + if (isa<AllocaInst>(V)) + return true; + if (isa<GlobalValue>(V) && !isa<GlobalAlias>(V)) + return true; + if (isNoAliasCall(V)) + return true; + if (const Argument *A = dyn_cast<Argument>(V)) + return A->hasNoAliasAttr() || A->hasByValAttr(); + return false; +} diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp new file mode 100644 index 0000000..d947220 --- /dev/null +++ b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp @@ -0,0 +1,173 @@ +//===- AliasAnalysisCounter.cpp - Alias Analysis Query Counter ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass which can be used to count how many alias queries +// are being made and how the alias analysis implementation being used responds. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +static cl::opt<bool> +PrintAll("count-aa-print-all-queries", cl::ReallyHidden, cl::init(true)); +static cl::opt<bool> +PrintAllFailures("count-aa-print-all-failed-queries", cl::ReallyHidden); + +namespace { + class AliasAnalysisCounter : public ModulePass, public AliasAnalysis { + unsigned No, May, Partial, Must; + unsigned NoMR, JustRef, JustMod, MR; + Module *M; + public: + static char ID; // Class identification, replacement for typeinfo + AliasAnalysisCounter() : ModulePass(ID) { + initializeAliasAnalysisCounterPass(*PassRegistry::getPassRegistry()); + No = May = Partial = Must = 0; + NoMR = JustRef = JustMod = MR = 0; + } + + void printLine(const char *Desc, unsigned Val, unsigned Sum) { + errs() << " " << Val << " " << Desc << " responses (" + << Val*100/Sum << "%)\n"; + } + ~AliasAnalysisCounter() { + unsigned AASum = No+May+Partial+Must; + unsigned MRSum = NoMR+JustRef+JustMod+MR; + if (AASum + MRSum) { // Print a report if any counted queries occurred... + errs() << "\n===== Alias Analysis Counter Report =====\n" + << " Analysis counted:\n" + << " " << AASum << " Total Alias Queries Performed\n"; + if (AASum) { + printLine("no alias", No, AASum); + printLine("may alias", May, AASum); + printLine("partial alias", Partial, AASum); + printLine("must alias", Must, AASum); + errs() << " Alias Analysis Counter Summary: " << No*100/AASum << "%/" + << May*100/AASum << "%/" + << Partial*100/AASum << "%/" + << Must*100/AASum<<"%\n\n"; + } + + errs() << " " << MRSum << " Total Mod/Ref Queries Performed\n"; + if (MRSum) { + printLine("no mod/ref", NoMR, MRSum); + printLine("ref", JustRef, MRSum); + printLine("mod", JustMod, MRSum); + printLine("mod/ref", MR, MRSum); + errs() << " Mod/Ref Analysis Counter Summary: " <<NoMR*100/MRSum + << "%/" << JustRef*100/MRSum << "%/" << JustMod*100/MRSum + << "%/" << MR*100/MRSum <<"%\n\n"; + } + } + } + + bool runOnModule(Module &M) { + this->M = &M; + InitializeAliasAnalysis(this); + return false; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AliasAnalysis::getAnalysisUsage(AU); + AU.addRequired<AliasAnalysis>(); + AU.setPreservesAll(); + } + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + + // FIXME: We could count these too... + bool pointsToConstantMemory(const Location &Loc, bool OrLocal) { + return getAnalysis<AliasAnalysis>().pointsToConstantMemory(Loc, OrLocal); + } + + // Forwarding functions: just delegate to a real AA implementation, counting + // the number of responses... + AliasResult alias(const Location &LocA, const Location &LocB); + + ModRefResult getModRefInfo(ImmutableCallSite CS, + const Location &Loc); + ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + return AliasAnalysis::getModRefInfo(CS1,CS2); + } + }; +} + +char AliasAnalysisCounter::ID = 0; +INITIALIZE_AG_PASS(AliasAnalysisCounter, AliasAnalysis, "count-aa", + "Count Alias Analysis Query Responses", false, true, false) + +ModulePass *llvm::createAliasAnalysisCounterPass() { + return new AliasAnalysisCounter(); +} + +AliasAnalysis::AliasResult +AliasAnalysisCounter::alias(const Location &LocA, const Location &LocB) { + AliasResult R = getAnalysis<AliasAnalysis>().alias(LocA, LocB); + + const char *AliasString; + switch (R) { + default: llvm_unreachable("Unknown alias type!"); + case NoAlias: No++; AliasString = "No alias"; break; + case MayAlias: May++; AliasString = "May alias"; break; + case PartialAlias: Partial++; AliasString = "Partial alias"; break; + case MustAlias: Must++; AliasString = "Must alias"; break; + } + + if (PrintAll || (PrintAllFailures && R == MayAlias)) { + errs() << AliasString << ":\t"; + errs() << "[" << LocA.Size << "B] "; + WriteAsOperand(errs(), LocA.Ptr, true, M); + errs() << ", "; + errs() << "[" << LocB.Size << "B] "; + WriteAsOperand(errs(), LocB.Ptr, true, M); + errs() << "\n"; + } + + return R; +} + +AliasAnalysis::ModRefResult +AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS, + const Location &Loc) { + ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, Loc); + + const char *MRString; + switch (R) { + default: llvm_unreachable("Unknown mod/ref type!"); + case NoModRef: NoMR++; MRString = "NoModRef"; break; + case Ref: JustRef++; MRString = "JustRef"; break; + case Mod: JustMod++; MRString = "JustMod"; break; + case ModRef: MR++; MRString = "ModRef"; break; + } + + if (PrintAll || (PrintAllFailures && R == ModRef)) { + errs() << MRString << ": Ptr: "; + errs() << "[" << Loc.Size << "B] "; + WriteAsOperand(errs(), Loc.Ptr, true, M); + errs() << "\t<->" << *CS.getInstruction() << '\n'; + } + return R; +} diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp new file mode 100644 index 0000000..1afc1b7 --- /dev/null +++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -0,0 +1,304 @@ +//===- AliasAnalysisEvaluator.cpp - Alias Analysis Accuracy Evaluator -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a simple N^2 alias analysis accuracy evaluator. +// Basically, for each function in the program, it simply queries to see how the +// alias analysis implementation answers alias queries between each pair of +// pointers in the function. +// +// This is inspired and adapted from code by: Naveen Neelakantam, Francesco +// Spadini, and Wojciech Stryjewski. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SetVector.h" +using namespace llvm; + +static cl::opt<bool> PrintAll("print-all-alias-modref-info", cl::ReallyHidden); + +static cl::opt<bool> PrintNoAlias("print-no-aliases", cl::ReallyHidden); +static cl::opt<bool> PrintMayAlias("print-may-aliases", cl::ReallyHidden); +static cl::opt<bool> PrintPartialAlias("print-partial-aliases", cl::ReallyHidden); +static cl::opt<bool> PrintMustAlias("print-must-aliases", cl::ReallyHidden); + +static cl::opt<bool> PrintNoModRef("print-no-modref", cl::ReallyHidden); +static cl::opt<bool> PrintMod("print-mod", cl::ReallyHidden); +static cl::opt<bool> PrintRef("print-ref", cl::ReallyHidden); +static cl::opt<bool> PrintModRef("print-modref", cl::ReallyHidden); + +namespace { + class AAEval : public FunctionPass { + unsigned NoAlias, MayAlias, PartialAlias, MustAlias; + unsigned NoModRef, Mod, Ref, ModRef; + + public: + static char ID; // Pass identification, replacement for typeid + AAEval() : FunctionPass(ID) { + initializeAAEvalPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AliasAnalysis>(); + AU.setPreservesAll(); + } + + bool doInitialization(Module &M) { + NoAlias = MayAlias = PartialAlias = MustAlias = 0; + NoModRef = Mod = Ref = ModRef = 0; + + if (PrintAll) { + PrintNoAlias = PrintMayAlias = true; + PrintPartialAlias = PrintMustAlias = true; + PrintNoModRef = PrintMod = PrintRef = PrintModRef = true; + } + return false; + } + + bool runOnFunction(Function &F); + bool doFinalization(Module &M); + }; +} + +char AAEval::ID = 0; +INITIALIZE_PASS_BEGIN(AAEval, "aa-eval", + "Exhaustive Alias Analysis Precision Evaluator", false, true) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(AAEval, "aa-eval", + "Exhaustive Alias Analysis Precision Evaluator", false, true) + +FunctionPass *llvm::createAAEvalPass() { return new AAEval(); } + +static void PrintResults(const char *Msg, bool P, const Value *V1, + const Value *V2, const Module *M) { + if (P) { + std::string o1, o2; + { + raw_string_ostream os1(o1), os2(o2); + WriteAsOperand(os1, V1, true, M); + WriteAsOperand(os2, V2, true, M); + } + + if (o2 < o1) + std::swap(o1, o2); + errs() << " " << Msg << ":\t" + << o1 << ", " + << o2 << "\n"; + } +} + +static inline void +PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr, + Module *M) { + if (P) { + errs() << " " << Msg << ": Ptr: "; + WriteAsOperand(errs(), Ptr, true, M); + errs() << "\t<->" << *I << '\n'; + } +} + +static inline void +PrintModRefResults(const char *Msg, bool P, CallSite CSA, CallSite CSB, + Module *M) { + if (P) { + errs() << " " << Msg << ": " << *CSA.getInstruction() + << " <-> " << *CSB.getInstruction() << '\n'; + } +} + +static inline bool isInterestingPointer(Value *V) { + return V->getType()->isPointerTy() + && !isa<ConstantPointerNull>(V); +} + +bool AAEval::runOnFunction(Function &F) { + AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); + + SetVector<Value *> Pointers; + SetVector<CallSite> CallSites; + + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) + if (I->getType()->isPointerTy()) // Add all pointer arguments. + Pointers.insert(I); + + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + if (I->getType()->isPointerTy()) // Add all pointer instructions. + Pointers.insert(&*I); + Instruction &Inst = *I; + if (CallSite CS = cast<Value>(&Inst)) { + Value *Callee = CS.getCalledValue(); + // Skip actual functions for direct function calls. + if (!isa<Function>(Callee) && isInterestingPointer(Callee)) + Pointers.insert(Callee); + // Consider formals. + for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); + AI != AE; ++AI) + if (isInterestingPointer(*AI)) + Pointers.insert(*AI); + CallSites.insert(CS); + } else { + // Consider all operands. + for (Instruction::op_iterator OI = Inst.op_begin(), OE = Inst.op_end(); + OI != OE; ++OI) + if (isInterestingPointer(*OI)) + Pointers.insert(*OI); + } + } + + if (PrintNoAlias || PrintMayAlias || PrintPartialAlias || PrintMustAlias || + PrintNoModRef || PrintMod || PrintRef || PrintModRef) + errs() << "Function: " << F.getName() << ": " << Pointers.size() + << " pointers, " << CallSites.size() << " call sites\n"; + + // iterate over the worklist, and run the full (n^2)/2 disambiguations + for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end(); + I1 != E; ++I1) { + uint64_t I1Size = AliasAnalysis::UnknownSize; + const Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType(); + if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy); + + for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) { + uint64_t I2Size = AliasAnalysis::UnknownSize; + const Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType(); + if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy); + + switch (AA.alias(*I1, I1Size, *I2, I2Size)) { + case AliasAnalysis::NoAlias: + PrintResults("NoAlias", PrintNoAlias, *I1, *I2, F.getParent()); + ++NoAlias; break; + case AliasAnalysis::MayAlias: + PrintResults("MayAlias", PrintMayAlias, *I1, *I2, F.getParent()); + ++MayAlias; break; + case AliasAnalysis::PartialAlias: + PrintResults("PartialAlias", PrintPartialAlias, *I1, *I2, + F.getParent()); + ++PartialAlias; break; + case AliasAnalysis::MustAlias: + PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent()); + ++MustAlias; break; + default: + errs() << "Unknown alias query result!\n"; + } + } + } + + // Mod/ref alias analysis: compare all pairs of calls and values + for (SetVector<CallSite>::iterator C = CallSites.begin(), + Ce = CallSites.end(); C != Ce; ++C) { + Instruction *I = C->getInstruction(); + + for (SetVector<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end(); + V != Ve; ++V) { + uint64_t Size = AliasAnalysis::UnknownSize; + const Type *ElTy = cast<PointerType>((*V)->getType())->getElementType(); + if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy); + + switch (AA.getModRefInfo(*C, *V, Size)) { + case AliasAnalysis::NoModRef: + PrintModRefResults("NoModRef", PrintNoModRef, I, *V, F.getParent()); + ++NoModRef; break; + case AliasAnalysis::Mod: + PrintModRefResults("Just Mod", PrintMod, I, *V, F.getParent()); + ++Mod; break; + case AliasAnalysis::Ref: + PrintModRefResults("Just Ref", PrintRef, I, *V, F.getParent()); + ++Ref; break; + case AliasAnalysis::ModRef: + PrintModRefResults("Both ModRef", PrintModRef, I, *V, F.getParent()); + ++ModRef; break; + default: + errs() << "Unknown alias query result!\n"; + } + } + } + + // Mod/ref alias analysis: compare all pairs of calls + for (SetVector<CallSite>::iterator C = CallSites.begin(), + Ce = CallSites.end(); C != Ce; ++C) { + for (SetVector<CallSite>::iterator D = CallSites.begin(); D != Ce; ++D) { + if (D == C) + continue; + switch (AA.getModRefInfo(*C, *D)) { + case AliasAnalysis::NoModRef: + PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent()); + ++NoModRef; break; + case AliasAnalysis::Mod: + PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent()); + ++Mod; break; + case AliasAnalysis::Ref: + PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent()); + ++Ref; break; + case AliasAnalysis::ModRef: + PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent()); + ++ModRef; break; + } + } + } + + return false; +} + +static void PrintPercent(unsigned Num, unsigned Sum) { + errs() << "(" << Num*100ULL/Sum << "." + << ((Num*1000ULL/Sum) % 10) << "%)\n"; +} + +bool AAEval::doFinalization(Module &M) { + unsigned AliasSum = NoAlias + MayAlias + PartialAlias + MustAlias; + errs() << "===== Alias Analysis Evaluator Report =====\n"; + if (AliasSum == 0) { + errs() << " Alias Analysis Evaluator Summary: No pointers!\n"; + } else { + errs() << " " << AliasSum << " Total Alias Queries Performed\n"; + errs() << " " << NoAlias << " no alias responses "; + PrintPercent(NoAlias, AliasSum); + errs() << " " << MayAlias << " may alias responses "; + PrintPercent(MayAlias, AliasSum); + errs() << " " << PartialAlias << " partial alias responses "; + PrintPercent(PartialAlias, AliasSum); + errs() << " " << MustAlias << " must alias responses "; + PrintPercent(MustAlias, AliasSum); + errs() << " Alias Analysis Evaluator Pointer Alias Summary: " + << NoAlias*100/AliasSum << "%/" << MayAlias*100/AliasSum << "%/" + << PartialAlias*100/AliasSum << "%/" + << MustAlias*100/AliasSum << "%\n"; + } + + // Display the summary for mod/ref analysis + unsigned ModRefSum = NoModRef + Mod + Ref + ModRef; + if (ModRefSum == 0) { + errs() << " Alias Analysis Mod/Ref Evaluator Summary: no mod/ref!\n"; + } else { + errs() << " " << ModRefSum << " Total ModRef Queries Performed\n"; + errs() << " " << NoModRef << " no mod/ref responses "; + PrintPercent(NoModRef, ModRefSum); + errs() << " " << Mod << " mod responses "; + PrintPercent(Mod, ModRefSum); + errs() << " " << Ref << " ref responses "; + PrintPercent(Ref, ModRefSum); + errs() << " " << ModRef << " mod & ref responses "; + PrintPercent(ModRef, ModRefSum); + errs() << " Alias Analysis Evaluator Mod/Ref Summary: " + << NoModRef*100/ModRefSum << "%/" << Mod*100/ModRefSum << "%/" + << Ref*100/ModRefSum << "%/" << ModRef*100/ModRefSum << "%\n"; + } + + return false; +} diff --git a/contrib/llvm/lib/Analysis/AliasDebugger.cpp b/contrib/llvm/lib/Analysis/AliasDebugger.cpp new file mode 100644 index 0000000..f15c051 --- /dev/null +++ b/contrib/llvm/lib/Analysis/AliasDebugger.cpp @@ -0,0 +1,138 @@ +//===- AliasDebugger.cpp - Simple Alias Analysis Use Checker --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This simple pass checks alias analysis users to ensure that if they +// create a new value, they do not query AA without informing it of the value. +// It acts as a shim over any other AA pass you want. +// +// Yes keeping track of every value in the program is expensive, but this is +// a debugging pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Instructions.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include <set> +using namespace llvm; + +namespace { + + class AliasDebugger : public ModulePass, public AliasAnalysis { + + //What we do is simple. Keep track of every value the AA could + //know about, and verify that queries are one of those. + //A query to a value that didn't exist when the AA was created + //means someone forgot to update the AA when creating new values + + std::set<const Value*> Vals; + + public: + static char ID; // Class identification, replacement for typeinfo + AliasDebugger() : ModulePass(ID) { + initializeAliasDebuggerPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) { + InitializeAliasAnalysis(this); // set up super class + + for(Module::global_iterator I = M.global_begin(), + E = M.global_end(); I != E; ++I) { + Vals.insert(&*I); + for (User::const_op_iterator OI = I->op_begin(), + OE = I->op_end(); OI != OE; ++OI) + Vals.insert(*OI); + } + + for(Module::iterator I = M.begin(), + E = M.end(); I != E; ++I){ + Vals.insert(&*I); + if(!I->isDeclaration()) { + for (Function::arg_iterator AI = I->arg_begin(), AE = I->arg_end(); + AI != AE; ++AI) + Vals.insert(&*AI); + for (Function::const_iterator FI = I->begin(), FE = I->end(); + FI != FE; ++FI) + for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end(); + BI != BE; ++BI) { + Vals.insert(&*BI); + for (User::const_op_iterator OI = BI->op_begin(), + OE = BI->op_end(); OI != OE; ++OI) + Vals.insert(*OI); + } + } + + } + return false; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AliasAnalysis::getAnalysisUsage(AU); + AU.setPreservesAll(); // Does not transform code + } + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + + //------------------------------------------------ + // Implement the AliasAnalysis API + // + AliasResult alias(const Location &LocA, const Location &LocB) { + assert(Vals.find(LocA.Ptr) != Vals.end() && + "Never seen value in AA before"); + assert(Vals.find(LocB.Ptr) != Vals.end() && + "Never seen value in AA before"); + return AliasAnalysis::alias(LocA, LocB); + } + + ModRefResult getModRefInfo(ImmutableCallSite CS, + const Location &Loc) { + assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before"); + return AliasAnalysis::getModRefInfo(CS, Loc); + } + + ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + return AliasAnalysis::getModRefInfo(CS1,CS2); + } + + bool pointsToConstantMemory(const Location &Loc, bool OrLocal) { + assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before"); + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + } + + virtual void deleteValue(Value *V) { + assert(Vals.find(V) != Vals.end() && "Never seen value in AA before"); + AliasAnalysis::deleteValue(V); + } + virtual void copyValue(Value *From, Value *To) { + Vals.insert(To); + AliasAnalysis::copyValue(From, To); + } + + }; +} + +char AliasDebugger::ID = 0; +INITIALIZE_AG_PASS(AliasDebugger, AliasAnalysis, "debug-aa", + "AA use debugger", false, true, false) + +Pass *llvm::createAliasDebugger() { return new AliasDebugger(); } + diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp new file mode 100644 index 0000000..2ed6949 --- /dev/null +++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp @@ -0,0 +1,652 @@ +//===- AliasSetTracker.cpp - Alias Sets Tracker implementation-------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AliasSetTracker and AliasSet classes. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Pass.h" +#include "llvm/Type.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +/// mergeSetIn - Merge the specified alias set into this alias set. +/// +void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) { + assert(!AS.Forward && "Alias set is already forwarding!"); + assert(!Forward && "This set is a forwarding set!!"); + + // Update the alias and access types of this set... + AccessTy |= AS.AccessTy; + AliasTy |= AS.AliasTy; + Volatile |= AS.Volatile; + + if (AliasTy == MustAlias) { + // Check that these two merged sets really are must aliases. Since both + // used to be must-alias sets, we can just check any pointer from each set + // for aliasing. + AliasAnalysis &AA = AST.getAliasAnalysis(); + PointerRec *L = getSomePointer(); + PointerRec *R = AS.getSomePointer(); + + // If the pointers are not a must-alias pair, this set becomes a may alias. + if (AA.alias(AliasAnalysis::Location(L->getValue(), + L->getSize(), + L->getTBAAInfo()), + AliasAnalysis::Location(R->getValue(), + R->getSize(), + R->getTBAAInfo())) + != AliasAnalysis::MustAlias) + AliasTy = MayAlias; + } + + if (CallSites.empty()) { // Merge call sites... + if (!AS.CallSites.empty()) + std::swap(CallSites, AS.CallSites); + } else if (!AS.CallSites.empty()) { + CallSites.insert(CallSites.end(), AS.CallSites.begin(), AS.CallSites.end()); + AS.CallSites.clear(); + } + + AS.Forward = this; // Forward across AS now... + addRef(); // AS is now pointing to us... + + // Merge the list of constituent pointers... + if (AS.PtrList) { + *PtrListEnd = AS.PtrList; + AS.PtrList->setPrevInList(PtrListEnd); + PtrListEnd = AS.PtrListEnd; + + AS.PtrList = 0; + AS.PtrListEnd = &AS.PtrList; + assert(*AS.PtrListEnd == 0 && "End of list is not null?"); + } +} + +void AliasSetTracker::removeAliasSet(AliasSet *AS) { + if (AliasSet *Fwd = AS->Forward) { + Fwd->dropRef(*this); + AS->Forward = 0; + } + AliasSets.erase(AS); +} + +void AliasSet::removeFromTracker(AliasSetTracker &AST) { + assert(RefCount == 0 && "Cannot remove non-dead alias set from tracker!"); + AST.removeAliasSet(this); +} + +void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry, + uint64_t Size, const MDNode *TBAAInfo, + bool KnownMustAlias) { + assert(!Entry.hasAliasSet() && "Entry already in set!"); + + // Check to see if we have to downgrade to _may_ alias. + if (isMustAlias() && !KnownMustAlias) + if (PointerRec *P = getSomePointer()) { + AliasAnalysis &AA = AST.getAliasAnalysis(); + AliasAnalysis::AliasResult Result = + AA.alias(AliasAnalysis::Location(P->getValue(), P->getSize(), + P->getTBAAInfo()), + AliasAnalysis::Location(Entry.getValue(), Size, TBAAInfo)); + if (Result != AliasAnalysis::MustAlias) + AliasTy = MayAlias; + else // First entry of must alias must have maximum size! + P->updateSizeAndTBAAInfo(Size, TBAAInfo); + assert(Result != AliasAnalysis::NoAlias && "Cannot be part of must set!"); + } + + Entry.setAliasSet(this); + Entry.updateSizeAndTBAAInfo(Size, TBAAInfo); + + // Add it to the end of the list... + assert(*PtrListEnd == 0 && "End of list is not null?"); + *PtrListEnd = &Entry; + PtrListEnd = Entry.setPrevInList(PtrListEnd); + assert(*PtrListEnd == 0 && "End of list is not null?"); + addRef(); // Entry points to alias set. +} + +void AliasSet::addCallSite(CallSite CS, AliasAnalysis &AA) { + CallSites.push_back(CS.getInstruction()); + + AliasAnalysis::ModRefBehavior Behavior = AA.getModRefBehavior(CS); + if (Behavior == AliasAnalysis::DoesNotAccessMemory) + return; + if (AliasAnalysis::onlyReadsMemory(Behavior)) { + AliasTy = MayAlias; + AccessTy |= Refs; + return; + } + + // FIXME: This should use mod/ref information to make this not suck so bad + AliasTy = MayAlias; + AccessTy = ModRef; +} + +/// aliasesPointer - Return true if the specified pointer "may" (or must) +/// alias one of the members in the set. +/// +bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size, + const MDNode *TBAAInfo, + AliasAnalysis &AA) const { + if (AliasTy == MustAlias) { + assert(CallSites.empty() && "Illegal must alias set!"); + + // If this is a set of MustAliases, only check to see if the pointer aliases + // SOME value in the set. + PointerRec *SomePtr = getSomePointer(); + assert(SomePtr && "Empty must-alias set??"); + return AA.alias(AliasAnalysis::Location(SomePtr->getValue(), + SomePtr->getSize(), + SomePtr->getTBAAInfo()), + AliasAnalysis::Location(Ptr, Size, TBAAInfo)); + } + + // If this is a may-alias set, we have to check all of the pointers in the set + // to be sure it doesn't alias the set... + for (iterator I = begin(), E = end(); I != E; ++I) + if (AA.alias(AliasAnalysis::Location(Ptr, Size, TBAAInfo), + AliasAnalysis::Location(I.getPointer(), I.getSize(), + I.getTBAAInfo()))) + return true; + + // Check the call sites list and invoke list... + if (!CallSites.empty()) { + for (unsigned i = 0, e = CallSites.size(); i != e; ++i) + if (AA.getModRefInfo(CallSites[i], + AliasAnalysis::Location(Ptr, Size, TBAAInfo)) != + AliasAnalysis::NoModRef) + return true; + } + + return false; +} + +bool AliasSet::aliasesCallSite(CallSite CS, AliasAnalysis &AA) const { + if (AA.doesNotAccessMemory(CS)) + return false; + + for (unsigned i = 0, e = CallSites.size(); i != e; ++i) { + if (AA.getModRefInfo(getCallSite(i), CS) != AliasAnalysis::NoModRef || + AA.getModRefInfo(CS, getCallSite(i)) != AliasAnalysis::NoModRef) + return true; + } + + for (iterator I = begin(), E = end(); I != E; ++I) + if (AA.getModRefInfo(CS, I.getPointer(), I.getSize()) != + AliasAnalysis::NoModRef) + return true; + + return false; +} + +void AliasSetTracker::clear() { + // Delete all the PointerRec entries. + for (PointerMapType::iterator I = PointerMap.begin(), E = PointerMap.end(); + I != E; ++I) + I->second->eraseFromList(); + + PointerMap.clear(); + + // The alias sets should all be clear now. + AliasSets.clear(); +} + + +/// findAliasSetForPointer - Given a pointer, find the one alias set to put the +/// instruction referring to the pointer into. If there are multiple alias sets +/// that may alias the pointer, merge them together and return the unified set. +/// +AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr, + uint64_t Size, + const MDNode *TBAAInfo) { + AliasSet *FoundSet = 0; + for (iterator I = begin(), E = end(); I != E; ++I) { + if (I->Forward || !I->aliasesPointer(Ptr, Size, TBAAInfo, AA)) continue; + + if (FoundSet == 0) { // If this is the first alias set ptr can go into. + FoundSet = I; // Remember it. + } else { // Otherwise, we must merge the sets. + FoundSet->mergeSetIn(*I, *this); // Merge in contents. + } + } + + return FoundSet; +} + +/// containsPointer - Return true if the specified location is represented by +/// this alias set, false otherwise. This does not modify the AST object or +/// alias sets. +bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size, + const MDNode *TBAAInfo) const { + for (const_iterator I = begin(), E = end(); I != E; ++I) + if (!I->Forward && I->aliasesPointer(Ptr, Size, TBAAInfo, AA)) + return true; + return false; +} + + + +AliasSet *AliasSetTracker::findAliasSetForCallSite(CallSite CS) { + AliasSet *FoundSet = 0; + for (iterator I = begin(), E = end(); I != E; ++I) { + if (I->Forward || !I->aliasesCallSite(CS, AA)) + continue; + + if (FoundSet == 0) // If this is the first alias set ptr can go into. + FoundSet = I; // Remember it. + else if (!I->Forward) // Otherwise, we must merge the sets. + FoundSet->mergeSetIn(*I, *this); // Merge in contents. + } + return FoundSet; +} + + + + +/// getAliasSetForPointer - Return the alias set that the specified pointer +/// lives in. +AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size, + const MDNode *TBAAInfo, + bool *New) { + AliasSet::PointerRec &Entry = getEntryFor(Pointer); + + // Check to see if the pointer is already known. + if (Entry.hasAliasSet()) { + Entry.updateSizeAndTBAAInfo(Size, TBAAInfo); + // Return the set! + return *Entry.getAliasSet(*this)->getForwardedTarget(*this); + } + + if (AliasSet *AS = findAliasSetForPointer(Pointer, Size, TBAAInfo)) { + // Add it to the alias set it aliases. + AS->addPointer(*this, Entry, Size, TBAAInfo); + return *AS; + } + + if (New) *New = true; + // Otherwise create a new alias set to hold the loaded pointer. + AliasSets.push_back(new AliasSet()); + AliasSets.back().addPointer(*this, Entry, Size, TBAAInfo); + return AliasSets.back(); +} + +bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) { + bool NewPtr; + addPointer(Ptr, Size, TBAAInfo, AliasSet::NoModRef, NewPtr); + return NewPtr; +} + + +bool AliasSetTracker::add(LoadInst *LI) { + bool NewPtr; + AliasSet &AS = addPointer(LI->getOperand(0), + AA.getTypeStoreSize(LI->getType()), + LI->getMetadata(LLVMContext::MD_tbaa), + AliasSet::Refs, NewPtr); + if (LI->isVolatile()) AS.setVolatile(); + return NewPtr; +} + +bool AliasSetTracker::add(StoreInst *SI) { + bool NewPtr; + Value *Val = SI->getOperand(0); + AliasSet &AS = addPointer(SI->getOperand(1), + AA.getTypeStoreSize(Val->getType()), + SI->getMetadata(LLVMContext::MD_tbaa), + AliasSet::Mods, NewPtr); + if (SI->isVolatile()) AS.setVolatile(); + return NewPtr; +} + +bool AliasSetTracker::add(VAArgInst *VAAI) { + bool NewPtr; + addPointer(VAAI->getOperand(0), AliasAnalysis::UnknownSize, + VAAI->getMetadata(LLVMContext::MD_tbaa), + AliasSet::ModRef, NewPtr); + return NewPtr; +} + + +bool AliasSetTracker::add(CallSite CS) { + if (isa<DbgInfoIntrinsic>(CS.getInstruction())) + return true; // Ignore DbgInfo Intrinsics. + if (AA.doesNotAccessMemory(CS)) + return true; // doesn't alias anything + + AliasSet *AS = findAliasSetForCallSite(CS); + if (AS) { + AS->addCallSite(CS, AA); + return false; + } + AliasSets.push_back(new AliasSet()); + AS = &AliasSets.back(); + AS->addCallSite(CS, AA); + return true; +} + +bool AliasSetTracker::add(Instruction *I) { + // Dispatch to one of the other add methods. + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + return add(LI); + if (StoreInst *SI = dyn_cast<StoreInst>(I)) + return add(SI); + if (CallInst *CI = dyn_cast<CallInst>(I)) + return add(CI); + if (InvokeInst *II = dyn_cast<InvokeInst>(I)) + return add(II); + if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I)) + return add(VAAI); + return true; +} + +void AliasSetTracker::add(BasicBlock &BB) { + for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) + add(I); +} + +void AliasSetTracker::add(const AliasSetTracker &AST) { + assert(&AA == &AST.AA && + "Merging AliasSetTracker objects with different Alias Analyses!"); + + // Loop over all of the alias sets in AST, adding the pointers contained + // therein into the current alias sets. This can cause alias sets to be + // merged together in the current AST. + for (const_iterator I = AST.begin(), E = AST.end(); I != E; ++I) { + if (I->Forward) continue; // Ignore forwarding alias sets + + AliasSet &AS = const_cast<AliasSet&>(*I); + + // If there are any call sites in the alias set, add them to this AST. + for (unsigned i = 0, e = AS.CallSites.size(); i != e; ++i) + add(AS.CallSites[i]); + + // Loop over all of the pointers in this alias set. + bool X; + for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) { + AliasSet &NewAS = addPointer(ASI.getPointer(), ASI.getSize(), + ASI.getTBAAInfo(), + (AliasSet::AccessType)AS.AccessTy, X); + if (AS.isVolatile()) NewAS.setVolatile(); + } + } +} + +/// remove - Remove the specified (potentially non-empty) alias set from the +/// tracker. +void AliasSetTracker::remove(AliasSet &AS) { + // Drop all call sites. + AS.CallSites.clear(); + + // Clear the alias set. + unsigned NumRefs = 0; + while (!AS.empty()) { + AliasSet::PointerRec *P = AS.PtrList; + + Value *ValToRemove = P->getValue(); + + // Unlink and delete entry from the list of values. + P->eraseFromList(); + + // Remember how many references need to be dropped. + ++NumRefs; + + // Finally, remove the entry. + PointerMap.erase(ValToRemove); + } + + // Stop using the alias set, removing it. + AS.RefCount -= NumRefs; + if (AS.RefCount == 0) + AS.removeFromTracker(*this); +} + +bool +AliasSetTracker::remove(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) { + AliasSet *AS = findAliasSetForPointer(Ptr, Size, TBAAInfo); + if (!AS) return false; + remove(*AS); + return true; +} + +bool AliasSetTracker::remove(LoadInst *LI) { + uint64_t Size = AA.getTypeStoreSize(LI->getType()); + const MDNode *TBAAInfo = LI->getMetadata(LLVMContext::MD_tbaa); + AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size, TBAAInfo); + if (!AS) return false; + remove(*AS); + return true; +} + +bool AliasSetTracker::remove(StoreInst *SI) { + uint64_t Size = AA.getTypeStoreSize(SI->getOperand(0)->getType()); + const MDNode *TBAAInfo = SI->getMetadata(LLVMContext::MD_tbaa); + AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size, TBAAInfo); + if (!AS) return false; + remove(*AS); + return true; +} + +bool AliasSetTracker::remove(VAArgInst *VAAI) { + AliasSet *AS = findAliasSetForPointer(VAAI->getOperand(0), + AliasAnalysis::UnknownSize, + VAAI->getMetadata(LLVMContext::MD_tbaa)); + if (!AS) return false; + remove(*AS); + return true; +} + +bool AliasSetTracker::remove(CallSite CS) { + if (AA.doesNotAccessMemory(CS)) + return false; // doesn't alias anything + + AliasSet *AS = findAliasSetForCallSite(CS); + if (!AS) return false; + remove(*AS); + return true; +} + +bool AliasSetTracker::remove(Instruction *I) { + // Dispatch to one of the other remove methods... + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + return remove(LI); + if (StoreInst *SI = dyn_cast<StoreInst>(I)) + return remove(SI); + if (CallInst *CI = dyn_cast<CallInst>(I)) + return remove(CI); + if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I)) + return remove(VAAI); + return true; +} + + +// deleteValue method - This method is used to remove a pointer value from the +// AliasSetTracker entirely. It should be used when an instruction is deleted +// from the program to update the AST. If you don't use this, you would have +// dangling pointers to deleted instructions. +// +void AliasSetTracker::deleteValue(Value *PtrVal) { + // Notify the alias analysis implementation that this value is gone. + AA.deleteValue(PtrVal); + + // If this is a call instruction, remove the callsite from the appropriate + // AliasSet (if present). + if (CallSite CS = PtrVal) { + if (!AA.doesNotAccessMemory(CS)) { + // Scan all the alias sets to see if this call site is contained. + for (iterator I = begin(), E = end(); I != E; ++I) { + if (I->Forward) continue; + + I->removeCallSite(CS); + } + } + } + + // First, look up the PointerRec for this pointer. + PointerMapType::iterator I = PointerMap.find(PtrVal); + if (I == PointerMap.end()) return; // Noop + + // If we found one, remove the pointer from the alias set it is in. + AliasSet::PointerRec *PtrValEnt = I->second; + AliasSet *AS = PtrValEnt->getAliasSet(*this); + + // Unlink and delete from the list of values. + PtrValEnt->eraseFromList(); + + // Stop using the alias set. + AS->dropRef(*this); + + PointerMap.erase(I); +} + +// copyValue - This method should be used whenever a preexisting value in the +// program is copied or cloned, introducing a new value. Note that it is ok for +// clients that use this method to introduce the same value multiple times: if +// the tracker already knows about a value, it will ignore the request. +// +void AliasSetTracker::copyValue(Value *From, Value *To) { + // Notify the alias analysis implementation that this value is copied. + AA.copyValue(From, To); + + // First, look up the PointerRec for this pointer. + PointerMapType::iterator I = PointerMap.find(From); + if (I == PointerMap.end()) + return; // Noop + assert(I->second->hasAliasSet() && "Dead entry?"); + + AliasSet::PointerRec &Entry = getEntryFor(To); + if (Entry.hasAliasSet()) return; // Already in the tracker! + + // Add it to the alias set it aliases... + I = PointerMap.find(From); + AliasSet *AS = I->second->getAliasSet(*this); + AS->addPointer(*this, Entry, I->second->getSize(), + I->second->getTBAAInfo(), + true); +} + + + +//===----------------------------------------------------------------------===// +// AliasSet/AliasSetTracker Printing Support +//===----------------------------------------------------------------------===// + +void AliasSet::print(raw_ostream &OS) const { + OS << " AliasSet[" << (void*)this << ", " << RefCount << "] "; + OS << (AliasTy == MustAlias ? "must" : "may") << " alias, "; + switch (AccessTy) { + case NoModRef: OS << "No access "; break; + case Refs : OS << "Ref "; break; + case Mods : OS << "Mod "; break; + case ModRef : OS << "Mod/Ref "; break; + default: llvm_unreachable("Bad value for AccessTy!"); + } + if (isVolatile()) OS << "[volatile] "; + if (Forward) + OS << " forwarding to " << (void*)Forward; + + + if (!empty()) { + OS << "Pointers: "; + for (iterator I = begin(), E = end(); I != E; ++I) { + if (I != begin()) OS << ", "; + WriteAsOperand(OS << "(", I.getPointer()); + OS << ", " << I.getSize() << ")"; + } + } + if (!CallSites.empty()) { + OS << "\n " << CallSites.size() << " Call Sites: "; + for (unsigned i = 0, e = CallSites.size(); i != e; ++i) { + if (i) OS << ", "; + WriteAsOperand(OS, CallSites[i]); + } + } + OS << "\n"; +} + +void AliasSetTracker::print(raw_ostream &OS) const { + OS << "Alias Set Tracker: " << AliasSets.size() << " alias sets for " + << PointerMap.size() << " pointer values.\n"; + for (const_iterator I = begin(), E = end(); I != E; ++I) + I->print(OS); + OS << "\n"; +} + +void AliasSet::dump() const { print(dbgs()); } +void AliasSetTracker::dump() const { print(dbgs()); } + +//===----------------------------------------------------------------------===// +// ASTCallbackVH Class Implementation +//===----------------------------------------------------------------------===// + +void AliasSetTracker::ASTCallbackVH::deleted() { + assert(AST && "ASTCallbackVH called with a null AliasSetTracker!"); + AST->deleteValue(getValPtr()); + // this now dangles! +} + +void AliasSetTracker::ASTCallbackVH::allUsesReplacedWith(Value *V) { + AST->copyValue(getValPtr(), V); +} + +AliasSetTracker::ASTCallbackVH::ASTCallbackVH(Value *V, AliasSetTracker *ast) + : CallbackVH(V), AST(ast) {} + +AliasSetTracker::ASTCallbackVH & +AliasSetTracker::ASTCallbackVH::operator=(Value *V) { + return *this = ASTCallbackVH(V, AST); +} + +//===----------------------------------------------------------------------===// +// AliasSetPrinter Pass +//===----------------------------------------------------------------------===// + +namespace { + class AliasSetPrinter : public FunctionPass { + AliasSetTracker *Tracker; + public: + static char ID; // Pass identification, replacement for typeid + AliasSetPrinter() : FunctionPass(ID) { + initializeAliasSetPrinterPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<AliasAnalysis>(); + } + + virtual bool runOnFunction(Function &F) { + Tracker = new AliasSetTracker(getAnalysis<AliasAnalysis>()); + + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) + Tracker->add(&*I); + Tracker->print(errs()); + delete Tracker; + return false; + } + }; +} + +char AliasSetPrinter::ID = 0; +INITIALIZE_PASS_BEGIN(AliasSetPrinter, "print-alias-sets", + "Alias Set Printer", false, true) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(AliasSetPrinter, "print-alias-sets", + "Alias Set Printer", false, true) diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp new file mode 100644 index 0000000..71e0a83 --- /dev/null +++ b/contrib/llvm/lib/Analysis/Analysis.cpp @@ -0,0 +1,104 @@ +//===-- Analysis.cpp ------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm-c/Analysis.h" +#include "llvm/InitializePasses.h" +#include "llvm/Analysis/Verifier.h" +#include <cstring> + +using namespace llvm; + +/// initializeAnalysis - Initialize all passes linked into the Analysis library. +void llvm::initializeAnalysis(PassRegistry &Registry) { + initializeAliasAnalysisAnalysisGroup(Registry); + initializeAliasAnalysisCounterPass(Registry); + initializeAAEvalPass(Registry); + initializeAliasDebuggerPass(Registry); + initializeAliasSetPrinterPass(Registry); + initializeNoAAPass(Registry); + initializeBasicAliasAnalysisPass(Registry); + initializeBlockFrequencyPass(Registry); + initializeBranchProbabilityInfoPass(Registry); + initializeCFGViewerPass(Registry); + initializeCFGPrinterPass(Registry); + initializeCFGOnlyViewerPass(Registry); + initializeCFGOnlyPrinterPass(Registry); + initializePrintDbgInfoPass(Registry); + initializeDominanceFrontierPass(Registry); + initializeDomViewerPass(Registry); + initializeDomPrinterPass(Registry); + initializeDomOnlyViewerPass(Registry); + initializePostDomViewerPass(Registry); + initializeDomOnlyPrinterPass(Registry); + initializePostDomPrinterPass(Registry); + initializePostDomOnlyViewerPass(Registry); + initializePostDomOnlyPrinterPass(Registry); + initializeIVUsersPass(Registry); + initializeInstCountPass(Registry); + initializeIntervalPartitionPass(Registry); + initializeLazyValueInfoPass(Registry); + initializeLibCallAliasAnalysisPass(Registry); + initializeLintPass(Registry); + initializeLoopDependenceAnalysisPass(Registry); + initializeLoopInfoPass(Registry); + initializeMemDepPrinterPass(Registry); + initializeMemoryDependenceAnalysisPass(Registry); + initializeModuleDebugInfoPrinterPass(Registry); + initializePostDominatorTreePass(Registry); + initializeProfileEstimatorPassPass(Registry); + initializeNoProfileInfoPass(Registry); + initializeNoPathProfileInfoPass(Registry); + initializeProfileInfoAnalysisGroup(Registry); + initializePathProfileInfoAnalysisGroup(Registry); + initializeLoaderPassPass(Registry); + initializePathProfileLoaderPassPass(Registry); + initializeProfileVerifierPassPass(Registry); + initializePathProfileVerifierPass(Registry); + initializeRegionInfoPass(Registry); + initializeRegionViewerPass(Registry); + initializeRegionPrinterPass(Registry); + initializeRegionOnlyViewerPass(Registry); + initializeRegionOnlyPrinterPass(Registry); + initializeScalarEvolutionPass(Registry); + initializeScalarEvolutionAliasAnalysisPass(Registry); + initializeTypeBasedAliasAnalysisPass(Registry); +} + +void LLVMInitializeAnalysis(LLVMPassRegistryRef R) { + initializeAnalysis(*unwrap(R)); +} + +LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action, + char **OutMessages) { + std::string Messages; + + LLVMBool Result = verifyModule(*unwrap(M), + static_cast<VerifierFailureAction>(Action), + OutMessages? &Messages : 0); + + if (OutMessages) + *OutMessages = strdup(Messages.c_str()); + + return Result; +} + +LLVMBool LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action) { + return verifyFunction(*unwrap<Function>(Fn), + static_cast<VerifierFailureAction>(Action)); +} + +void LLVMViewFunctionCFG(LLVMValueRef Fn) { + Function *F = unwrap<Function>(Fn); + F->viewCFG(); +} + +void LLVMViewFunctionCFGOnly(LLVMValueRef Fn) { + Function *F = unwrap<Function>(Fn); + F->viewCFGOnly(); +} diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp new file mode 100644 index 0000000..8330ea7 --- /dev/null +++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -0,0 +1,1213 @@ +//===- BasicAliasAnalysis.cpp - Stateless Alias Analysis Impl -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the primary stateless implementation of the +// Alias Analysis interface that implements identities (two different +// globals cannot alias, etc), but does no stateful analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalAlias.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Operator.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include <algorithm> +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Useful predicates +//===----------------------------------------------------------------------===// + +/// isKnownNonNull - Return true if we know that the specified value is never +/// null. +static bool isKnownNonNull(const Value *V) { + // Alloca never returns null, malloc might. + if (isa<AllocaInst>(V)) return true; + + // A byval argument is never null. + if (const Argument *A = dyn_cast<Argument>(V)) + return A->hasByValAttr(); + + // Global values are not null unless extern weak. + if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) + return !GV->hasExternalWeakLinkage(); + return false; +} + +/// isNonEscapingLocalObject - Return true if the pointer is to a function-local +/// object that never escapes from the function. +static bool isNonEscapingLocalObject(const Value *V) { + // If this is a local allocation, check to see if it escapes. + if (isa<AllocaInst>(V) || isNoAliasCall(V)) + // Set StoreCaptures to True so that we can assume in our callers that the + // pointer is not the result of a load instruction. Currently + // PointerMayBeCaptured doesn't have any special analysis for the + // StoreCaptures=false case; if it did, our callers could be refined to be + // more precise. + return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true); + + // If this is an argument that corresponds to a byval or noalias argument, + // then it has not escaped before entering the function. Check if it escapes + // inside the function. + if (const Argument *A = dyn_cast<Argument>(V)) + if (A->hasByValAttr() || A->hasNoAliasAttr()) { + // Don't bother analyzing arguments already known not to escape. + if (A->hasNoCaptureAttr()) + return true; + return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true); + } + return false; +} + +/// isEscapeSource - Return true if the pointer is one which would have +/// been considered an escape by isNonEscapingLocalObject. +static bool isEscapeSource(const Value *V) { + if (isa<CallInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V)) + return true; + + // The load case works because isNonEscapingLocalObject considers all + // stores to be escapes (it passes true for the StoreCaptures argument + // to PointerMayBeCaptured). + if (isa<LoadInst>(V)) + return true; + + return false; +} + +/// getObjectSize - Return the size of the object specified by V, or +/// UnknownSize if unknown. +static uint64_t getObjectSize(const Value *V, const TargetData &TD) { + const Type *AccessTy; + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { + if (!GV->hasDefinitiveInitializer()) + return AliasAnalysis::UnknownSize; + AccessTy = GV->getType()->getElementType(); + } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + if (!AI->isArrayAllocation()) + AccessTy = AI->getType()->getElementType(); + else + return AliasAnalysis::UnknownSize; + } else if (const CallInst* CI = extractMallocCall(V)) { + if (!isArrayMalloc(V, &TD)) + // The size is the argument to the malloc call. + if (const ConstantInt* C = dyn_cast<ConstantInt>(CI->getArgOperand(0))) + return C->getZExtValue(); + return AliasAnalysis::UnknownSize; + } else if (const Argument *A = dyn_cast<Argument>(V)) { + if (A->hasByValAttr()) + AccessTy = cast<PointerType>(A->getType())->getElementType(); + else + return AliasAnalysis::UnknownSize; + } else { + return AliasAnalysis::UnknownSize; + } + + if (AccessTy->isSized()) + return TD.getTypeAllocSize(AccessTy); + return AliasAnalysis::UnknownSize; +} + +/// isObjectSmallerThan - Return true if we can prove that the object specified +/// by V is smaller than Size. +static bool isObjectSmallerThan(const Value *V, uint64_t Size, + const TargetData &TD) { + uint64_t ObjectSize = getObjectSize(V, TD); + return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size; +} + +/// isObjectSize - Return true if we can prove that the object specified +/// by V has size Size. +static bool isObjectSize(const Value *V, uint64_t Size, + const TargetData &TD) { + uint64_t ObjectSize = getObjectSize(V, TD); + return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize == Size; +} + +//===----------------------------------------------------------------------===// +// GetElementPtr Instruction Decomposition and Analysis +//===----------------------------------------------------------------------===// + +namespace { + enum ExtensionKind { + EK_NotExtended, + EK_SignExt, + EK_ZeroExt + }; + + struct VariableGEPIndex { + const Value *V; + ExtensionKind Extension; + int64_t Scale; + }; +} + + +/// GetLinearExpression - Analyze the specified value as a linear expression: +/// "A*V + B", where A and B are constant integers. Return the scale and offset +/// values as APInts and return V as a Value*, and return whether we looked +/// through any sign or zero extends. The incoming Value is known to have +/// IntegerType and it may already be sign or zero extended. +/// +/// Note that this looks through extends, so the high bits may not be +/// represented in the result. +static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, + ExtensionKind &Extension, + const TargetData &TD, unsigned Depth) { + assert(V->getType()->isIntegerTy() && "Not an integer value"); + + // Limit our recursion depth. + if (Depth == 6) { + Scale = 1; + Offset = 0; + return V; + } + + if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) { + if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) { + switch (BOp->getOpcode()) { + default: break; + case Instruction::Or: + // X|C == X+C if all the bits in C are unset in X. Otherwise we can't + // analyze it. + if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &TD)) + break; + // FALL THROUGH. + case Instruction::Add: + V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, + TD, Depth+1); + Offset += RHSC->getValue(); + return V; + case Instruction::Mul: + V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, + TD, Depth+1); + Offset *= RHSC->getValue(); + Scale *= RHSC->getValue(); + return V; + case Instruction::Shl: + V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, + TD, Depth+1); + Offset <<= RHSC->getValue().getLimitedValue(); + Scale <<= RHSC->getValue().getLimitedValue(); + return V; + } + } + } + + // Since GEP indices are sign extended anyway, we don't care about the high + // bits of a sign or zero extended value - just scales and offsets. The + // extensions have to be consistent though. + if ((isa<SExtInst>(V) && Extension != EK_ZeroExt) || + (isa<ZExtInst>(V) && Extension != EK_SignExt)) { + Value *CastOp = cast<CastInst>(V)->getOperand(0); + unsigned OldWidth = Scale.getBitWidth(); + unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits(); + Scale = Scale.trunc(SmallWidth); + Offset = Offset.trunc(SmallWidth); + Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt; + + Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension, + TD, Depth+1); + Scale = Scale.zext(OldWidth); + Offset = Offset.zext(OldWidth); + + return Result; + } + + Scale = 1; + Offset = 0; + return V; +} + +/// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it +/// into a base pointer with a constant offset and a number of scaled symbolic +/// offsets. +/// +/// The scaled symbolic offsets (represented by pairs of a Value* and a scale in +/// the VarIndices vector) are Value*'s that are known to be scaled by the +/// specified amount, but which may have other unrepresented high bits. As such, +/// the gep cannot necessarily be reconstructed from its decomposed form. +/// +/// When TargetData is around, this function is capable of analyzing everything +/// that GetUnderlyingObject can look through. When not, it just looks +/// through pointer casts. +/// +static const Value * +DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, + SmallVectorImpl<VariableGEPIndex> &VarIndices, + const TargetData *TD) { + // Limit recursion depth to limit compile time in crazy cases. + unsigned MaxLookup = 6; + + BaseOffs = 0; + do { + // See if this is a bitcast or GEP. + const Operator *Op = dyn_cast<Operator>(V); + if (Op == 0) { + // The only non-operator case we can handle are GlobalAliases. + if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { + if (!GA->mayBeOverridden()) { + V = GA->getAliasee(); + continue; + } + } + return V; + } + + if (Op->getOpcode() == Instruction::BitCast) { + V = Op->getOperand(0); + continue; + } + + const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op); + if (GEPOp == 0) { + // If it's not a GEP, hand it off to SimplifyInstruction to see if it + // can come up with something. This matches what GetUnderlyingObject does. + if (const Instruction *I = dyn_cast<Instruction>(V)) + // TODO: Get a DominatorTree and use it here. + if (const Value *Simplified = + SimplifyInstruction(const_cast<Instruction *>(I), TD)) { + V = Simplified; + continue; + } + + return V; + } + + // Don't attempt to analyze GEPs over unsized objects. + if (!cast<PointerType>(GEPOp->getOperand(0)->getType()) + ->getElementType()->isSized()) + return V; + + // If we are lacking TargetData information, we can't compute the offets of + // elements computed by GEPs. However, we can handle bitcast equivalent + // GEPs. + if (TD == 0) { + if (!GEPOp->hasAllZeroIndices()) + return V; + V = GEPOp->getOperand(0); + continue; + } + + // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices. + gep_type_iterator GTI = gep_type_begin(GEPOp); + for (User::const_op_iterator I = GEPOp->op_begin()+1, + E = GEPOp->op_end(); I != E; ++I) { + Value *Index = *I; + // Compute the (potentially symbolic) offset in bytes for this index. + if (const StructType *STy = dyn_cast<StructType>(*GTI++)) { + // For a struct, add the member offset. + unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); + if (FieldNo == 0) continue; + + BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo); + continue; + } + + // For an array/pointer, add the element offset, explicitly scaled. + if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) { + if (CIdx->isZero()) continue; + BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue(); + continue; + } + + uint64_t Scale = TD->getTypeAllocSize(*GTI); + ExtensionKind Extension = EK_NotExtended; + + // If the integer type is smaller than the pointer size, it is implicitly + // sign extended to pointer size. + unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth(); + if (TD->getPointerSizeInBits() > Width) + Extension = EK_SignExt; + + // Use GetLinearExpression to decompose the index into a C1*V+C2 form. + APInt IndexScale(Width, 0), IndexOffset(Width, 0); + Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension, + *TD, 0); + + // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale. + // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale. + BaseOffs += IndexOffset.getSExtValue()*Scale; + Scale *= IndexScale.getSExtValue(); + + + // If we already had an occurrence of this index variable, merge this + // scale into it. For example, we want to handle: + // A[x][x] -> x*16 + x*4 -> x*20 + // This also ensures that 'x' only appears in the index list once. + for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) { + if (VarIndices[i].V == Index && + VarIndices[i].Extension == Extension) { + Scale += VarIndices[i].Scale; + VarIndices.erase(VarIndices.begin()+i); + break; + } + } + + // Make sure that we have a scale that makes sense for this target's + // pointer size. + if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) { + Scale <<= ShiftBits; + Scale = (int64_t)Scale >> ShiftBits; + } + + if (Scale) { + VariableGEPIndex Entry = {Index, Extension, Scale}; + VarIndices.push_back(Entry); + } + } + + // Analyze the base pointer next. + V = GEPOp->getOperand(0); + } while (--MaxLookup); + + // If the chain of expressions is too deep, just return early. + return V; +} + +/// GetIndexDifference - Dest and Src are the variable indices from two +/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base +/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic +/// difference between the two pointers. +static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest, + const SmallVectorImpl<VariableGEPIndex> &Src) { + if (Src.empty()) return; + + for (unsigned i = 0, e = Src.size(); i != e; ++i) { + const Value *V = Src[i].V; + ExtensionKind Extension = Src[i].Extension; + int64_t Scale = Src[i].Scale; + + // Find V in Dest. This is N^2, but pointer indices almost never have more + // than a few variable indexes. + for (unsigned j = 0, e = Dest.size(); j != e; ++j) { + if (Dest[j].V != V || Dest[j].Extension != Extension) continue; + + // If we found it, subtract off Scale V's from the entry in Dest. If it + // goes to zero, remove the entry. + if (Dest[j].Scale != Scale) + Dest[j].Scale -= Scale; + else + Dest.erase(Dest.begin()+j); + Scale = 0; + break; + } + + // If we didn't consume this entry, add it to the end of the Dest list. + if (Scale) { + VariableGEPIndex Entry = { V, Extension, -Scale }; + Dest.push_back(Entry); + } + } +} + +//===----------------------------------------------------------------------===// +// BasicAliasAnalysis Pass +//===----------------------------------------------------------------------===// + +#ifndef NDEBUG +static const Function *getParent(const Value *V) { + if (const Instruction *inst = dyn_cast<Instruction>(V)) + return inst->getParent()->getParent(); + + if (const Argument *arg = dyn_cast<Argument>(V)) + return arg->getParent(); + + return NULL; +} + +static bool notDifferentParent(const Value *O1, const Value *O2) { + + const Function *F1 = getParent(O1); + const Function *F2 = getParent(O2); + + return !F1 || !F2 || F1 == F2; +} +#endif + +namespace { + /// BasicAliasAnalysis - This is the primary alias analysis implementation. + struct BasicAliasAnalysis : public ImmutablePass, public AliasAnalysis { + static char ID; // Class identification, replacement for typeinfo + BasicAliasAnalysis() : ImmutablePass(ID), + // AliasCache rarely has more than 1 or 2 elements, + // so start it off fairly small so that clear() + // doesn't have to tromp through 64 (the default) + // elements on each alias query. This really wants + // something like a SmallDenseMap. + AliasCache(8) { + initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + InitializeAliasAnalysis(this); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AliasAnalysis>(); + } + + virtual AliasResult alias(const Location &LocA, + const Location &LocB) { + assert(AliasCache.empty() && "AliasCache must be cleared after use!"); + assert(notDifferentParent(LocA.Ptr, LocB.Ptr) && + "BasicAliasAnalysis doesn't support interprocedural queries."); + AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.TBAATag, + LocB.Ptr, LocB.Size, LocB.TBAATag); + AliasCache.clear(); + return Alias; + } + + virtual ModRefResult getModRefInfo(ImmutableCallSite CS, + const Location &Loc); + + virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + // The AliasAnalysis base class has some smarts, lets use them. + return AliasAnalysis::getModRefInfo(CS1, CS2); + } + + /// pointsToConstantMemory - Chase pointers until we find a (constant + /// global) or not. + virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal); + + /// getModRefBehavior - Return the behavior when calling the given + /// call site. + virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS); + + /// getModRefBehavior - Return the behavior when calling the given function. + /// For use when the call site is not known. + virtual ModRefBehavior getModRefBehavior(const Function *F); + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + + private: + // AliasCache - Track alias queries to guard against recursion. + typedef std::pair<Location, Location> LocPair; + typedef DenseMap<LocPair, AliasResult> AliasCacheTy; + AliasCacheTy AliasCache; + + // Visited - Track instructions visited by pointsToConstantMemory. + SmallPtrSet<const Value*, 16> Visited; + + // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP + // instruction against another. + AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size, + const Value *V2, uint64_t V2Size, + const MDNode *V2TBAAInfo, + const Value *UnderlyingV1, const Value *UnderlyingV2); + + // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI + // instruction against another. + AliasResult aliasPHI(const PHINode *PN, uint64_t PNSize, + const MDNode *PNTBAAInfo, + const Value *V2, uint64_t V2Size, + const MDNode *V2TBAAInfo); + + /// aliasSelect - Disambiguate a Select instruction against another value. + AliasResult aliasSelect(const SelectInst *SI, uint64_t SISize, + const MDNode *SITBAAInfo, + const Value *V2, uint64_t V2Size, + const MDNode *V2TBAAInfo); + + AliasResult aliasCheck(const Value *V1, uint64_t V1Size, + const MDNode *V1TBAATag, + const Value *V2, uint64_t V2Size, + const MDNode *V2TBAATag); + }; +} // End of anonymous namespace + +// Register this pass... +char BasicAliasAnalysis::ID = 0; +INITIALIZE_AG_PASS(BasicAliasAnalysis, AliasAnalysis, "basicaa", + "Basic Alias Analysis (stateless AA impl)", + false, true, false) + +ImmutablePass *llvm::createBasicAliasAnalysisPass() { + return new BasicAliasAnalysis(); +} + +/// pointsToConstantMemory - Returns whether the given pointer value +/// points to memory that is local to the function, with global constants being +/// considered local to all functions. +bool +BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) { + assert(Visited.empty() && "Visited must be cleared after use!"); + + unsigned MaxLookup = 8; + SmallVector<const Value *, 16> Worklist; + Worklist.push_back(Loc.Ptr); + do { + const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), TD); + if (!Visited.insert(V)) { + Visited.clear(); + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + } + + // An alloca instruction defines local memory. + if (OrLocal && isa<AllocaInst>(V)) + continue; + + // A global constant counts as local memory for our purposes. + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { + // Note: this doesn't require GV to be "ODR" because it isn't legal for a + // global to be marked constant in some modules and non-constant in + // others. GV may even be a declaration, not a definition. + if (!GV->isConstant()) { + Visited.clear(); + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + } + continue; + } + + // If both select values point to local memory, then so does the select. + if (const SelectInst *SI = dyn_cast<SelectInst>(V)) { + Worklist.push_back(SI->getTrueValue()); + Worklist.push_back(SI->getFalseValue()); + continue; + } + + // If all values incoming to a phi node point to local memory, then so does + // the phi. + if (const PHINode *PN = dyn_cast<PHINode>(V)) { + // Don't bother inspecting phi nodes with many operands. + if (PN->getNumIncomingValues() > MaxLookup) { + Visited.clear(); + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + } + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + Worklist.push_back(PN->getIncomingValue(i)); + continue; + } + + // Otherwise be conservative. + Visited.clear(); + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + + } while (!Worklist.empty() && --MaxLookup); + + Visited.clear(); + return Worklist.empty(); +} + +/// getModRefBehavior - Return the behavior when calling the given call site. +AliasAnalysis::ModRefBehavior +BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { + if (CS.doesNotAccessMemory()) + // Can't do better than this. + return DoesNotAccessMemory; + + ModRefBehavior Min = UnknownModRefBehavior; + + // If the callsite knows it only reads memory, don't return worse + // than that. + if (CS.onlyReadsMemory()) + Min = OnlyReadsMemory; + + // The AliasAnalysis base class has some smarts, lets use them. + return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); +} + +/// getModRefBehavior - Return the behavior when calling the given function. +/// For use when the call site is not known. +AliasAnalysis::ModRefBehavior +BasicAliasAnalysis::getModRefBehavior(const Function *F) { + // If the function declares it doesn't access memory, we can't do better. + if (F->doesNotAccessMemory()) + return DoesNotAccessMemory; + + // For intrinsics, we can check the table. + if (unsigned iid = F->getIntrinsicID()) { +#define GET_INTRINSIC_MODREF_BEHAVIOR +#include "llvm/Intrinsics.gen" +#undef GET_INTRINSIC_MODREF_BEHAVIOR + } + + ModRefBehavior Min = UnknownModRefBehavior; + + // If the function declares it only reads memory, go with that. + if (F->onlyReadsMemory()) + Min = OnlyReadsMemory; + + // Otherwise be conservative. + return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min); +} + +/// getModRefInfo - Check to see if the specified callsite can clobber the +/// specified memory object. Since we only look at local properties of this +/// function, we really can't say much about this query. We do, however, use +/// simple "address taken" analysis on local objects. +AliasAnalysis::ModRefResult +BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, + const Location &Loc) { + assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) && + "AliasAnalysis query involving multiple functions!"); + + const Value *Object = GetUnderlyingObject(Loc.Ptr, TD); + + // If this is a tail call and Loc.Ptr points to a stack location, we know that + // the tail call cannot access or modify the local stack. + // We cannot exclude byval arguments here; these belong to the caller of + // the current function not to the current function, and a tail callee + // may reference them. + if (isa<AllocaInst>(Object)) + if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) + if (CI->isTailCall()) + return NoModRef; + + // If the pointer is to a locally allocated object that does not escape, + // then the call can not mod/ref the pointer unless the call takes the pointer + // as an argument, and itself doesn't capture it. + if (!isa<Constant>(Object) && CS.getInstruction() != Object && + isNonEscapingLocalObject(Object)) { + bool PassedAsArg = false; + unsigned ArgNo = 0; + for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); + CI != CE; ++CI, ++ArgNo) { + // Only look at the no-capture or byval pointer arguments. If this + // pointer were passed to arguments that were neither of these, then it + // couldn't be no-capture. + if (!(*CI)->getType()->isPointerTy() || + (!CS.paramHasAttr(ArgNo+1, Attribute::NoCapture) && + !CS.paramHasAttr(ArgNo+1, Attribute::ByVal))) + continue; + + // If this is a no-capture pointer argument, see if we can tell that it + // is impossible to alias the pointer we're checking. If not, we have to + // assume that the call could touch the pointer, even though it doesn't + // escape. + if (!isNoAlias(Location(cast<Value>(CI)), Loc)) { + PassedAsArg = true; + break; + } + } + + if (!PassedAsArg) + return NoModRef; + } + + ModRefResult Min = ModRef; + + // Finally, handle specific knowledge of intrinsics. + const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()); + if (II != 0) + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::memcpy: + case Intrinsic::memmove: { + uint64_t Len = UnknownSize; + if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) + Len = LenCI->getZExtValue(); + Value *Dest = II->getArgOperand(0); + Value *Src = II->getArgOperand(1); + // If it can't overlap the source dest, then it doesn't modref the loc. + if (isNoAlias(Location(Dest, Len), Loc)) { + if (isNoAlias(Location(Src, Len), Loc)) + return NoModRef; + // If it can't overlap the dest, then worst case it reads the loc. + Min = Ref; + } else if (isNoAlias(Location(Src, Len), Loc)) { + // If it can't overlap the source, then worst case it mutates the loc. + Min = Mod; + } + break; + } + case Intrinsic::memset: + // Since memset is 'accesses arguments' only, the AliasAnalysis base class + // will handle it for the variable length case. + if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) { + uint64_t Len = LenCI->getZExtValue(); + Value *Dest = II->getArgOperand(0); + if (isNoAlias(Location(Dest, Len), Loc)) + return NoModRef; + } + // We know that memset doesn't load anything. + Min = Mod; + break; + case Intrinsic::atomic_cmp_swap: + case Intrinsic::atomic_swap: + case Intrinsic::atomic_load_add: + case Intrinsic::atomic_load_sub: + case Intrinsic::atomic_load_and: + case Intrinsic::atomic_load_nand: + case Intrinsic::atomic_load_or: + case Intrinsic::atomic_load_xor: + case Intrinsic::atomic_load_max: + case Intrinsic::atomic_load_min: + case Intrinsic::atomic_load_umax: + case Intrinsic::atomic_load_umin: + if (TD) { + Value *Op1 = II->getArgOperand(0); + uint64_t Op1Size = TD->getTypeStoreSize(Op1->getType()); + MDNode *Tag = II->getMetadata(LLVMContext::MD_tbaa); + if (isNoAlias(Location(Op1, Op1Size, Tag), Loc)) + return NoModRef; + } + break; + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: { + uint64_t PtrSize = + cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); + if (isNoAlias(Location(II->getArgOperand(1), + PtrSize, + II->getMetadata(LLVMContext::MD_tbaa)), + Loc)) + return NoModRef; + break; + } + case Intrinsic::invariant_end: { + uint64_t PtrSize = + cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(); + if (isNoAlias(Location(II->getArgOperand(2), + PtrSize, + II->getMetadata(LLVMContext::MD_tbaa)), + Loc)) + return NoModRef; + break; + } + case Intrinsic::arm_neon_vld1: { + // LLVM's vld1 and vst1 intrinsics currently only support a single + // vector register. + uint64_t Size = + TD ? TD->getTypeStoreSize(II->getType()) : UnknownSize; + if (isNoAlias(Location(II->getArgOperand(0), Size, + II->getMetadata(LLVMContext::MD_tbaa)), + Loc)) + return NoModRef; + break; + } + case Intrinsic::arm_neon_vst1: { + uint64_t Size = + TD ? TD->getTypeStoreSize(II->getArgOperand(1)->getType()) : UnknownSize; + if (isNoAlias(Location(II->getArgOperand(0), Size, + II->getMetadata(LLVMContext::MD_tbaa)), + Loc)) + return NoModRef; + break; + } + } + + // The AliasAnalysis base class has some smarts, lets use them. + return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min); +} + +/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction +/// against another pointer. We know that V1 is a GEP, but we don't know +/// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, TD), +/// UnderlyingV2 is the same for V2. +/// +AliasAnalysis::AliasResult +BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, + const Value *V2, uint64_t V2Size, + const MDNode *V2TBAAInfo, + const Value *UnderlyingV1, + const Value *UnderlyingV2) { + int64_t GEP1BaseOffset; + SmallVector<VariableGEPIndex, 4> GEP1VariableIndices; + + // If we have two gep instructions with must-alias'ing base pointers, figure + // out if the indexes to the GEP tell us anything about the derived pointer. + if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) { + // Do the base pointers alias? + AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0, + UnderlyingV2, UnknownSize, 0); + + // If we get a No or May, then return it immediately, no amount of analysis + // will improve this situation. + if (BaseAlias != MustAlias) return BaseAlias; + + // Otherwise, we have a MustAlias. Since the base pointers alias each other + // exactly, see if the computed offset from the common pointer tells us + // about the relation of the resulting pointer. + const Value *GEP1BasePtr = + DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD); + + int64_t GEP2BaseOffset; + SmallVector<VariableGEPIndex, 4> GEP2VariableIndices; + const Value *GEP2BasePtr = + DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD); + + // If DecomposeGEPExpression isn't able to look all the way through the + // addressing operation, we must not have TD and this is too complex for us + // to handle without it. + if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { + assert(TD == 0 && + "DecomposeGEPExpression and GetUnderlyingObject disagree!"); + return MayAlias; + } + + // Subtract the GEP2 pointer from the GEP1 pointer to find out their + // symbolic difference. + GEP1BaseOffset -= GEP2BaseOffset; + GetIndexDifference(GEP1VariableIndices, GEP2VariableIndices); + + } else { + // Check to see if these two pointers are related by the getelementptr + // instruction. If one pointer is a GEP with a non-zero index of the other + // pointer, we know they cannot alias. + + // If both accesses are unknown size, we can't do anything useful here. + if (V1Size == UnknownSize && V2Size == UnknownSize) + return MayAlias; + + AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, 0, + V2, V2Size, V2TBAAInfo); + if (R != MustAlias) + // If V2 may alias GEP base pointer, conservatively returns MayAlias. + // If V2 is known not to alias GEP base pointer, then the two values + // cannot alias per GEP semantics: "A pointer value formed from a + // getelementptr instruction is associated with the addresses associated + // with the first operand of the getelementptr". + return R; + + const Value *GEP1BasePtr = + DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD); + + // If DecomposeGEPExpression isn't able to look all the way through the + // addressing operation, we must not have TD and this is too complex for us + // to handle without it. + if (GEP1BasePtr != UnderlyingV1) { + assert(TD == 0 && + "DecomposeGEPExpression and GetUnderlyingObject disagree!"); + return MayAlias; + } + } + + // In the two GEP Case, if there is no difference in the offsets of the + // computed pointers, the resultant pointers are a must alias. This + // hapens when we have two lexically identical GEP's (for example). + // + // In the other case, if we have getelementptr <ptr>, 0, 0, 0, 0, ... and V2 + // must aliases the GEP, the end result is a must alias also. + if (GEP1BaseOffset == 0 && GEP1VariableIndices.empty()) + return MustAlias; + + // If there is a difference between the pointers, but the difference is + // less than the size of the associated memory object, then we know + // that the objects are partially overlapping. + if (GEP1BaseOffset != 0 && GEP1VariableIndices.empty()) { + if (GEP1BaseOffset >= 0 ? + (V2Size != UnknownSize && (uint64_t)GEP1BaseOffset < V2Size) : + (V1Size != UnknownSize && -(uint64_t)GEP1BaseOffset < V1Size && + GEP1BaseOffset != INT64_MIN)) + return PartialAlias; + } + + // If we have a known constant offset, see if this offset is larger than the + // access size being queried. If so, and if no variable indices can remove + // pieces of this constant, then we know we have a no-alias. For example, + // &A[100] != &A. + + // In order to handle cases like &A[100][i] where i is an out of range + // subscript, we have to ignore all constant offset pieces that are a multiple + // of a scaled index. Do this by removing constant offsets that are a + // multiple of any of our variable indices. This allows us to transform + // things like &A[i][1] because i has a stride of (e.g.) 8 bytes but the 1 + // provides an offset of 4 bytes (assuming a <= 4 byte access). + for (unsigned i = 0, e = GEP1VariableIndices.size(); + i != e && GEP1BaseOffset;++i) + if (int64_t RemovedOffset = GEP1BaseOffset/GEP1VariableIndices[i].Scale) + GEP1BaseOffset -= RemovedOffset*GEP1VariableIndices[i].Scale; + + // If our known offset is bigger than the access size, we know we don't have + // an alias. + if (GEP1BaseOffset) { + if (GEP1BaseOffset >= 0 ? + (V2Size != UnknownSize && (uint64_t)GEP1BaseOffset >= V2Size) : + (V1Size != UnknownSize && -(uint64_t)GEP1BaseOffset >= V1Size && + GEP1BaseOffset != INT64_MIN)) + return NoAlias; + } + + // Statically, we can see that the base objects are the same, but the + // pointers have dynamic offsets which we can't resolve. And none of our + // little tricks above worked. + // + // TODO: Returning PartialAlias instead of MayAlias is a mild hack; the + // practical effect of this is protecting TBAA in the case of dynamic + // indices into arrays of unions. An alternative way to solve this would + // be to have clang emit extra metadata for unions and/or union accesses. + // A union-specific solution wouldn't handle the problem for malloc'd + // memory however. + return PartialAlias; +} + +static AliasAnalysis::AliasResult +MergeAliasResults(AliasAnalysis::AliasResult A, AliasAnalysis::AliasResult B) { + // If the results agree, take it. + if (A == B) + return A; + // A mix of PartialAlias and MustAlias is PartialAlias. + if ((A == AliasAnalysis::PartialAlias && B == AliasAnalysis::MustAlias) || + (B == AliasAnalysis::PartialAlias && A == AliasAnalysis::MustAlias)) + return AliasAnalysis::PartialAlias; + // Otherwise, we don't know anything. + return AliasAnalysis::MayAlias; +} + +/// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select +/// instruction against another. +AliasAnalysis::AliasResult +BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize, + const MDNode *SITBAAInfo, + const Value *V2, uint64_t V2Size, + const MDNode *V2TBAAInfo) { + // If the values are Selects with the same condition, we can do a more precise + // check: just check for aliases between the values on corresponding arms. + if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) + if (SI->getCondition() == SI2->getCondition()) { + AliasResult Alias = + aliasCheck(SI->getTrueValue(), SISize, SITBAAInfo, + SI2->getTrueValue(), V2Size, V2TBAAInfo); + if (Alias == MayAlias) + return MayAlias; + AliasResult ThisAlias = + aliasCheck(SI->getFalseValue(), SISize, SITBAAInfo, + SI2->getFalseValue(), V2Size, V2TBAAInfo); + return MergeAliasResults(ThisAlias, Alias); + } + + // If both arms of the Select node NoAlias or MustAlias V2, then returns + // NoAlias / MustAlias. Otherwise, returns MayAlias. + AliasResult Alias = + aliasCheck(V2, V2Size, V2TBAAInfo, SI->getTrueValue(), SISize, SITBAAInfo); + if (Alias == MayAlias) + return MayAlias; + + AliasResult ThisAlias = + aliasCheck(V2, V2Size, V2TBAAInfo, SI->getFalseValue(), SISize, SITBAAInfo); + return MergeAliasResults(ThisAlias, Alias); +} + +// aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction +// against another. +AliasAnalysis::AliasResult +BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, + const MDNode *PNTBAAInfo, + const Value *V2, uint64_t V2Size, + const MDNode *V2TBAAInfo) { + // If the values are PHIs in the same block, we can do a more precise + // as well as efficient check: just check for aliases between the values + // on corresponding edges. + if (const PHINode *PN2 = dyn_cast<PHINode>(V2)) + if (PN2->getParent() == PN->getParent()) { + AliasResult Alias = + aliasCheck(PN->getIncomingValue(0), PNSize, PNTBAAInfo, + PN2->getIncomingValueForBlock(PN->getIncomingBlock(0)), + V2Size, V2TBAAInfo); + if (Alias == MayAlias) + return MayAlias; + for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { + AliasResult ThisAlias = + aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo, + PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)), + V2Size, V2TBAAInfo); + Alias = MergeAliasResults(ThisAlias, Alias); + if (Alias == MayAlias) + break; + } + return Alias; + } + + SmallPtrSet<Value*, 4> UniqueSrc; + SmallVector<Value*, 4> V1Srcs; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *PV1 = PN->getIncomingValue(i); + if (isa<PHINode>(PV1)) + // If any of the source itself is a PHI, return MayAlias conservatively + // to avoid compile time explosion. The worst possible case is if both + // sides are PHI nodes. In which case, this is O(m x n) time where 'm' + // and 'n' are the number of PHI sources. + return MayAlias; + if (UniqueSrc.insert(PV1)) + V1Srcs.push_back(PV1); + } + + AliasResult Alias = aliasCheck(V2, V2Size, V2TBAAInfo, + V1Srcs[0], PNSize, PNTBAAInfo); + // Early exit if the check of the first PHI source against V2 is MayAlias. + // Other results are not possible. + if (Alias == MayAlias) + return MayAlias; + + // If all sources of the PHI node NoAlias or MustAlias V2, then returns + // NoAlias / MustAlias. Otherwise, returns MayAlias. + for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) { + Value *V = V1Srcs[i]; + + AliasResult ThisAlias = aliasCheck(V2, V2Size, V2TBAAInfo, + V, PNSize, PNTBAAInfo); + Alias = MergeAliasResults(ThisAlias, Alias); + if (Alias == MayAlias) + break; + } + + return Alias; +} + +// aliasCheck - Provide a bunch of ad-hoc rules to disambiguate in common cases, +// such as array references. +// +AliasAnalysis::AliasResult +BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, + const MDNode *V1TBAAInfo, + const Value *V2, uint64_t V2Size, + const MDNode *V2TBAAInfo) { + // If either of the memory references is empty, it doesn't matter what the + // pointer values are. + if (V1Size == 0 || V2Size == 0) + return NoAlias; + + // Strip off any casts if they exist. + V1 = V1->stripPointerCasts(); + V2 = V2->stripPointerCasts(); + + // Are we checking for alias of the same value? + if (V1 == V2) return MustAlias; + + if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy()) + return NoAlias; // Scalars cannot alias each other + + // Figure out what objects these things are pointing to if we can. + const Value *O1 = GetUnderlyingObject(V1, TD); + const Value *O2 = GetUnderlyingObject(V2, TD); + + // Null values in the default address space don't point to any object, so they + // don't alias any other pointer. + if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O1)) + if (CPN->getType()->getAddressSpace() == 0) + return NoAlias; + if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O2)) + if (CPN->getType()->getAddressSpace() == 0) + return NoAlias; + + if (O1 != O2) { + // If V1/V2 point to two different objects we know that we have no alias. + if (isIdentifiedObject(O1) && isIdentifiedObject(O2)) + return NoAlias; + + // Constant pointers can't alias with non-const isIdentifiedObject objects. + if ((isa<Constant>(O1) && isIdentifiedObject(O2) && !isa<Constant>(O2)) || + (isa<Constant>(O2) && isIdentifiedObject(O1) && !isa<Constant>(O1))) + return NoAlias; + + // Arguments can't alias with local allocations or noalias calls + // in the same function. + if (((isa<Argument>(O1) && (isa<AllocaInst>(O2) || isNoAliasCall(O2))) || + (isa<Argument>(O2) && (isa<AllocaInst>(O1) || isNoAliasCall(O1))))) + return NoAlias; + + // Most objects can't alias null. + if ((isa<ConstantPointerNull>(O2) && isKnownNonNull(O1)) || + (isa<ConstantPointerNull>(O1) && isKnownNonNull(O2))) + return NoAlias; + + // If one pointer is the result of a call/invoke or load and the other is a + // non-escaping local object within the same function, then we know the + // object couldn't escape to a point where the call could return it. + // + // Note that if the pointers are in different functions, there are a + // variety of complications. A call with a nocapture argument may still + // temporary store the nocapture argument's value in a temporary memory + // location if that memory location doesn't escape. Or it may pass a + // nocapture value to other functions as long as they don't capture it. + if (isEscapeSource(O1) && isNonEscapingLocalObject(O2)) + return NoAlias; + if (isEscapeSource(O2) && isNonEscapingLocalObject(O1)) + return NoAlias; + } + + // If the size of one access is larger than the entire object on the other + // side, then we know such behavior is undefined and can assume no alias. + if (TD) + if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD)) || + (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD))) + return NoAlias; + + // Check the cache before climbing up use-def chains. This also terminates + // otherwise infinitely recursive queries. + LocPair Locs(Location(V1, V1Size, V1TBAAInfo), + Location(V2, V2Size, V2TBAAInfo)); + if (V1 > V2) + std::swap(Locs.first, Locs.second); + std::pair<AliasCacheTy::iterator, bool> Pair = + AliasCache.insert(std::make_pair(Locs, MayAlias)); + if (!Pair.second) + return Pair.first->second; + + // FIXME: This isn't aggressively handling alias(GEP, PHI) for example: if the + // GEP can't simplify, we don't even look at the PHI cases. + if (!isa<GEPOperator>(V1) && isa<GEPOperator>(V2)) { + std::swap(V1, V2); + std::swap(V1Size, V2Size); + std::swap(O1, O2); + } + if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) { + AliasResult Result = aliasGEP(GV1, V1Size, V2, V2Size, V2TBAAInfo, O1, O2); + if (Result != MayAlias) return AliasCache[Locs] = Result; + } + + if (isa<PHINode>(V2) && !isa<PHINode>(V1)) { + std::swap(V1, V2); + std::swap(V1Size, V2Size); + } + if (const PHINode *PN = dyn_cast<PHINode>(V1)) { + AliasResult Result = aliasPHI(PN, V1Size, V1TBAAInfo, + V2, V2Size, V2TBAAInfo); + if (Result != MayAlias) return AliasCache[Locs] = Result; + } + + if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) { + std::swap(V1, V2); + std::swap(V1Size, V2Size); + } + if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) { + AliasResult Result = aliasSelect(S1, V1Size, V1TBAAInfo, + V2, V2Size, V2TBAAInfo); + if (Result != MayAlias) return AliasCache[Locs] = Result; + } + + // If both pointers are pointing into the same object and one of them + // accesses is accessing the entire object, then the accesses must + // overlap in some way. + if (TD && O1 == O2) + if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *TD)) || + (V2Size != UnknownSize && isObjectSize(O2, V2Size, *TD))) + return AliasCache[Locs] = PartialAlias; + + AliasResult Result = + AliasAnalysis::alias(Location(V1, V1Size, V1TBAAInfo), + Location(V2, V2Size, V2TBAAInfo)); + return AliasCache[Locs] = Result; +} diff --git a/contrib/llvm/lib/Analysis/BlockFrequency.cpp b/contrib/llvm/lib/Analysis/BlockFrequency.cpp new file mode 100644 index 0000000..4b86d1d --- /dev/null +++ b/contrib/llvm/lib/Analysis/BlockFrequency.cpp @@ -0,0 +1,59 @@ +//=======-------- BlockFrequency.cpp - Block Frequency Analysis -------=======// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Loops should be simplified before this analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/InitializePasses.h" +#include "llvm/Analysis/BlockFrequencyImpl.h" +#include "llvm/Analysis/BlockFrequency.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" + +using namespace llvm; + +INITIALIZE_PASS_BEGIN(BlockFrequency, "block-freq", "Block Frequency Analysis", + true, true) +INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo) +INITIALIZE_PASS_END(BlockFrequency, "block-freq", "Block Frequency Analysis", + true, true) + +char BlockFrequency::ID = 0; + + +BlockFrequency::BlockFrequency() : FunctionPass(ID) { + initializeBlockFrequencyPass(*PassRegistry::getPassRegistry()); + BFI = new BlockFrequencyImpl<BasicBlock, Function, BranchProbabilityInfo>(); +} + +BlockFrequency::~BlockFrequency() { + delete BFI; +} + +void BlockFrequency::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<BranchProbabilityInfo>(); + AU.setPreservesAll(); +} + +bool BlockFrequency::runOnFunction(Function &F) { + BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>(); + BFI->doFunction(&F, &BPI); + return false; +} + +/// getblockFreq - Return block frequency. Never return 0, value must be +/// positive. Please note that initial frequency is equal to 1024. It means that +/// we should not rely on the value itself, but only on the comparison to the +/// other block frequencies. We do this to avoid using of floating points. +/// +uint32_t BlockFrequency::getBlockFreq(BasicBlock *BB) { + return BFI->getBlockFreq(BB); +} diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp new file mode 100644 index 0000000..e39cd22 --- /dev/null +++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -0,0 +1,363 @@ +//===-- BranchProbabilityInfo.cpp - Branch Probability Analysis -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Loops should be simplified before this analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Instructions.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +INITIALIZE_PASS_BEGIN(BranchProbabilityInfo, "branch-prob", + "Branch Probability Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob", + "Branch Probability Analysis", false, true) + +char BranchProbabilityInfo::ID = 0; + +namespace { +// Please note that BranchProbabilityAnalysis is not a FunctionPass. +// It is created by BranchProbabilityInfo (which is a FunctionPass), which +// provides a clear interface. Thanks to that, all heuristics and other +// private methods are hidden in the .cpp file. +class BranchProbabilityAnalysis { + + typedef std::pair<BasicBlock *, BasicBlock *> Edge; + + DenseMap<Edge, uint32_t> *Weights; + + BranchProbabilityInfo *BP; + + LoopInfo *LI; + + + // Weights are for internal use only. They are used by heuristics to help to + // estimate edges' probability. Example: + // + // Using "Loop Branch Heuristics" we predict weights of edges for the + // block BB2. + // ... + // | + // V + // BB1<-+ + // | | + // | | (Weight = 128) + // V | + // BB2--+ + // | + // | (Weight = 4) + // V + // BB3 + // + // Probability of the edge BB2->BB1 = 128 / (128 + 4) = 0.9696.. + // Probability of the edge BB2->BB3 = 4 / (128 + 4) = 0.0303.. + + static const uint32_t LBH_TAKEN_WEIGHT = 128; + static const uint32_t LBH_NONTAKEN_WEIGHT = 4; + + // Standard weight value. Used when none of the heuristics set weight for + // the edge. + static const uint32_t NORMAL_WEIGHT = 16; + + // Minimum weight of an edge. Please note, that weight is NEVER 0. + static const uint32_t MIN_WEIGHT = 1; + + // Return TRUE if BB leads directly to a Return Instruction. + static bool isReturningBlock(BasicBlock *BB) { + SmallPtrSet<BasicBlock *, 8> Visited; + + while (true) { + TerminatorInst *TI = BB->getTerminator(); + if (isa<ReturnInst>(TI)) + return true; + + if (TI->getNumSuccessors() > 1) + break; + + // It is unreachable block which we can consider as a return instruction. + if (TI->getNumSuccessors() == 0) + return true; + + Visited.insert(BB); + BB = TI->getSuccessor(0); + + // Stop if cycle is detected. + if (Visited.count(BB)) + return false; + } + + return false; + } + + // Multiply Edge Weight by two. + void incEdgeWeight(BasicBlock *Src, BasicBlock *Dst) { + uint32_t Weight = BP->getEdgeWeight(Src, Dst); + uint32_t MaxWeight = getMaxWeightFor(Src); + + if (Weight * 2 > MaxWeight) + BP->setEdgeWeight(Src, Dst, MaxWeight); + else + BP->setEdgeWeight(Src, Dst, Weight * 2); + } + + // Divide Edge Weight by two. + void decEdgeWeight(BasicBlock *Src, BasicBlock *Dst) { + uint32_t Weight = BP->getEdgeWeight(Src, Dst); + + assert(Weight > 0); + if (Weight / 2 < MIN_WEIGHT) + BP->setEdgeWeight(Src, Dst, MIN_WEIGHT); + else + BP->setEdgeWeight(Src, Dst, Weight / 2); + } + + + uint32_t getMaxWeightFor(BasicBlock *BB) const { + return UINT32_MAX / BB->getTerminator()->getNumSuccessors(); + } + +public: + BranchProbabilityAnalysis(DenseMap<Edge, uint32_t> *W, + BranchProbabilityInfo *BP, LoopInfo *LI) + : Weights(W), BP(BP), LI(LI) { + } + + // Return Heuristics + void calcReturnHeuristics(BasicBlock *BB); + + // Pointer Heuristics + void calcPointerHeuristics(BasicBlock *BB); + + // Loop Branch Heuristics + void calcLoopBranchHeuristics(BasicBlock *BB); + + bool runOnFunction(Function &F); +}; +} // end anonymous namespace + +// Calculate Edge Weights using "Return Heuristics". Predict a successor which +// leads directly to Return Instruction will not be taken. +void BranchProbabilityAnalysis::calcReturnHeuristics(BasicBlock *BB){ + if (BB->getTerminator()->getNumSuccessors() == 1) + return; + + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + BasicBlock *Succ = *I; + if (isReturningBlock(Succ)) { + decEdgeWeight(BB, Succ); + } + } +} + +// Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion +// between two pointer or pointer and NULL will fail. +void BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) { + BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator()); + if (!BI || !BI->isConditional()) + return; + + Value *Cond = BI->getCondition(); + ICmpInst *CI = dyn_cast<ICmpInst>(Cond); + if (!CI || !CI->isEquality()) + return; + + Value *LHS = CI->getOperand(0); + + if (!LHS->getType()->isPointerTy()) + return; + + assert(CI->getOperand(1)->getType()->isPointerTy()); + + BasicBlock *Taken = BI->getSuccessor(0); + BasicBlock *NonTaken = BI->getSuccessor(1); + + // p != 0 -> isProb = true + // p == 0 -> isProb = false + // p != q -> isProb = true + // p == q -> isProb = false; + bool isProb = CI->getPredicate() == ICmpInst::ICMP_NE; + if (!isProb) + std::swap(Taken, NonTaken); + + incEdgeWeight(BB, Taken); + decEdgeWeight(BB, NonTaken); +} + +// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges +// as taken, exiting edges as not-taken. +void BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) { + uint32_t numSuccs = BB->getTerminator()->getNumSuccessors(); + + Loop *L = LI->getLoopFor(BB); + if (!L) + return; + + SmallVector<BasicBlock *, 8> BackEdges; + SmallVector<BasicBlock *, 8> ExitingEdges; + + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + BasicBlock *Succ = *I; + Loop *SuccL = LI->getLoopFor(Succ); + if (SuccL != L) + ExitingEdges.push_back(Succ); + else if (Succ == L->getHeader()) + BackEdges.push_back(Succ); + } + + if (uint32_t numBackEdges = BackEdges.size()) { + uint32_t backWeight = LBH_TAKEN_WEIGHT / numBackEdges; + if (backWeight < NORMAL_WEIGHT) + backWeight = NORMAL_WEIGHT; + + for (SmallVector<BasicBlock *, 8>::iterator EI = BackEdges.begin(), + EE = BackEdges.end(); EI != EE; ++EI) { + BasicBlock *Back = *EI; + BP->setEdgeWeight(BB, Back, backWeight); + } + } + + uint32_t numExitingEdges = ExitingEdges.size(); + if (uint32_t numNonExitingEdges = numSuccs - numExitingEdges) { + uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numNonExitingEdges; + if (exitWeight < MIN_WEIGHT) + exitWeight = MIN_WEIGHT; + + for (SmallVector<BasicBlock *, 8>::iterator EI = ExitingEdges.begin(), + EE = ExitingEdges.end(); EI != EE; ++EI) { + BasicBlock *Exiting = *EI; + BP->setEdgeWeight(BB, Exiting, exitWeight); + } + } +} + +bool BranchProbabilityAnalysis::runOnFunction(Function &F) { + + for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { + BasicBlock *BB = I++; + + // Only LBH uses setEdgeWeight method. + calcLoopBranchHeuristics(BB); + + // PH and RH use only incEdgeWeight and decEwdgeWeight methods to + // not efface LBH results. + calcPointerHeuristics(BB); + calcReturnHeuristics(BB); + } + + return false; +} + +void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LoopInfo>(); + AU.setPreservesAll(); +} + +bool BranchProbabilityInfo::runOnFunction(Function &F) { + LoopInfo &LI = getAnalysis<LoopInfo>(); + BranchProbabilityAnalysis BPA(&Weights, this, &LI); + return BPA.runOnFunction(F); +} + +uint32_t BranchProbabilityInfo::getSumForBlock(BasicBlock *BB) const { + uint32_t Sum = 0; + + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + BasicBlock *Succ = *I; + uint32_t Weight = getEdgeWeight(BB, Succ); + uint32_t PrevSum = Sum; + + Sum += Weight; + assert(Sum > PrevSum); (void) PrevSum; + } + + return Sum; +} + +bool BranchProbabilityInfo::isEdgeHot(BasicBlock *Src, BasicBlock *Dst) const { + // Hot probability is at least 4/5 = 80% + uint32_t Weight = getEdgeWeight(Src, Dst); + uint32_t Sum = getSumForBlock(Src); + + // FIXME: Implement BranchProbability::compare then change this code to + // compare this BranchProbability against a static "hot" BranchProbability. + return (uint64_t)Weight * 5 > (uint64_t)Sum * 4; +} + +BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const { + uint32_t Sum = 0; + uint32_t MaxWeight = 0; + BasicBlock *MaxSucc = 0; + + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + BasicBlock *Succ = *I; + uint32_t Weight = getEdgeWeight(BB, Succ); + uint32_t PrevSum = Sum; + + Sum += Weight; + assert(Sum > PrevSum); (void) PrevSum; + + if (Weight > MaxWeight) { + MaxWeight = Weight; + MaxSucc = Succ; + } + } + + // FIXME: Use BranchProbability::compare. + if ((uint64_t)MaxWeight * 5 > (uint64_t)Sum * 4) + return MaxSucc; + + return 0; +} + +// Return edge's weight. If can't find it, return DEFAULT_WEIGHT value. +uint32_t +BranchProbabilityInfo::getEdgeWeight(BasicBlock *Src, BasicBlock *Dst) const { + Edge E(Src, Dst); + DenseMap<Edge, uint32_t>::const_iterator I = Weights.find(E); + + if (I != Weights.end()) + return I->second; + + return DEFAULT_WEIGHT; +} + +void BranchProbabilityInfo::setEdgeWeight(BasicBlock *Src, BasicBlock *Dst, + uint32_t Weight) { + Weights[std::make_pair(Src, Dst)] = Weight; + DEBUG(dbgs() << "set edge " << Src->getNameStr() << " -> " + << Dst->getNameStr() << " weight to " << Weight + << (isEdgeHot(Src, Dst) ? " [is HOT now]\n" : "\n")); +} + + +BranchProbability BranchProbabilityInfo:: +getEdgeProbability(BasicBlock *Src, BasicBlock *Dst) const { + + uint32_t N = getEdgeWeight(Src, Dst); + uint32_t D = getSumForBlock(Src); + + return BranchProbability(N, D); +} + +raw_ostream & +BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, BasicBlock *Src, + BasicBlock *Dst) const { + + const BranchProbability Prob = getEdgeProbability(Src, Dst); + OS << "edge " << Src->getNameStr() << " -> " << Dst->getNameStr() + << " probability is " << Prob + << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n"); + + return OS; +} diff --git a/contrib/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm/lib/Analysis/CFGPrinter.cpp new file mode 100644 index 0000000..7bb063f --- /dev/null +++ b/contrib/llvm/lib/Analysis/CFGPrinter.cpp @@ -0,0 +1,165 @@ +//===- CFGPrinter.cpp - DOT printer for the control flow graph ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a '-dot-cfg' analysis pass, which emits the +// cfg.<fnname>.dot file for each function in the program, with a graph of the +// CFG for that function. +// +// The other main feature of this file is that it implements the +// Function::viewCFG method, which is useful for debugging passes which operate +// on the CFG. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CFGPrinter.h" + +#include "llvm/Pass.h" +using namespace llvm; + +namespace { + struct CFGViewer : public FunctionPass { + static char ID; // Pass identifcation, replacement for typeid + CFGViewer() : FunctionPass(ID) { + initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F) { + F.viewCFG(); + return false; + } + + void print(raw_ostream &OS, const Module* = 0) const {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; +} + +char CFGViewer::ID = 0; +INITIALIZE_PASS(CFGViewer, "view-cfg", "View CFG of function", false, true) + +namespace { + struct CFGOnlyViewer : public FunctionPass { + static char ID; // Pass identifcation, replacement for typeid + CFGOnlyViewer() : FunctionPass(ID) { + initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F) { + F.viewCFGOnly(); + return false; + } + + void print(raw_ostream &OS, const Module* = 0) const {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; +} + +char CFGOnlyViewer::ID = 0; +INITIALIZE_PASS(CFGOnlyViewer, "view-cfg-only", + "View CFG of function (with no function bodies)", false, true) + +namespace { + struct CFGPrinter : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + CFGPrinter() : FunctionPass(ID) { + initializeCFGPrinterPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F) { + std::string Filename = "cfg." + F.getNameStr() + ".dot"; + errs() << "Writing '" << Filename << "'..."; + + std::string ErrorInfo; + raw_fd_ostream File(Filename.c_str(), ErrorInfo); + + if (ErrorInfo.empty()) + WriteGraph(File, (const Function*)&F); + else + errs() << " error opening file for writing!"; + errs() << "\n"; + return false; + } + + void print(raw_ostream &OS, const Module* = 0) const {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; +} + +char CFGPrinter::ID = 0; +INITIALIZE_PASS(CFGPrinter, "dot-cfg", "Print CFG of function to 'dot' file", + false, true) + +namespace { + struct CFGOnlyPrinter : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + CFGOnlyPrinter() : FunctionPass(ID) { + initializeCFGOnlyPrinterPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F) { + std::string Filename = "cfg." + F.getNameStr() + ".dot"; + errs() << "Writing '" << Filename << "'..."; + + std::string ErrorInfo; + raw_fd_ostream File(Filename.c_str(), ErrorInfo); + + if (ErrorInfo.empty()) + WriteGraph(File, (const Function*)&F, true); + else + errs() << " error opening file for writing!"; + errs() << "\n"; + return false; + } + void print(raw_ostream &OS, const Module* = 0) const {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; +} + +char CFGOnlyPrinter::ID = 0; +INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only", + "Print CFG of function to 'dot' file (with no function bodies)", + false, true) + +/// viewCFG - This function is meant for use from the debugger. You can just +/// say 'call F->viewCFG()' and a ghostview window should pop up from the +/// program, displaying the CFG of the current function. This depends on there +/// being a 'dot' and 'gv' program in your path. +/// +void Function::viewCFG() const { + ViewGraph(this, "cfg" + getNameStr()); +} + +/// viewCFGOnly - This function is meant for use from the debugger. It works +/// just like viewCFG, but it does not include the contents of basic blocks +/// into the nodes, just the label. If you are only interested in the CFG t +/// his can make the graph smaller. +/// +void Function::viewCFGOnly() const { + ViewGraph(this, "cfg" + getNameStr(), true); +} + +FunctionPass *llvm::createCFGPrinterPass () { + return new CFGPrinter(); +} + +FunctionPass *llvm::createCFGOnlyPrinterPass () { + return new CFGOnlyPrinter(); +} + diff --git a/contrib/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm/lib/Analysis/CaptureTracking.cpp new file mode 100644 index 0000000..b2c27d1 --- /dev/null +++ b/contrib/llvm/lib/Analysis/CaptureTracking.cpp @@ -0,0 +1,148 @@ +//===--- CaptureTracking.cpp - Determine whether a pointer is captured ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains routines that help determine which pointers are captured. +// A pointer value is captured if the function makes a copy of any part of the +// pointer that outlives the call. Not being captured means, more or less, that +// the pointer is only dereferenced and not stored in a global. Returning part +// of the pointer as the function return value may or may not count as capturing +// the pointer, depending on the context. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Value.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/CallSite.h" +using namespace llvm; + +/// As its comment mentions, PointerMayBeCaptured can be expensive. +/// However, it's not easy for BasicAA to cache the result, because +/// it's an ImmutablePass. To work around this, bound queries at a +/// fixed number of uses. +/// +/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep +/// a cache. Then we can move the code from BasicAliasAnalysis into +/// that path, and remove this threshold. +static int const Threshold = 20; + +/// PointerMayBeCaptured - Return true if this pointer value may be captured +/// by the enclosing function (which is required to exist). This routine can +/// be expensive, so consider caching the results. The boolean ReturnCaptures +/// specifies whether returning the value (or part of it) from the function +/// counts as capturing it or not. The boolean StoreCaptures specified whether +/// storing the value (or part of it) into memory anywhere automatically +/// counts as capturing it or not. +bool llvm::PointerMayBeCaptured(const Value *V, + bool ReturnCaptures, bool StoreCaptures) { + assert(V->getType()->isPointerTy() && "Capture is for pointers only!"); + SmallVector<Use*, Threshold> Worklist; + SmallSet<Use*, Threshold> Visited; + int Count = 0; + + for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); + UI != UE; ++UI) { + // If there are lots of uses, conservatively say that the value + // is captured to avoid taking too much compile time. + if (Count++ >= Threshold) + return true; + + Use *U = &UI.getUse(); + Visited.insert(U); + Worklist.push_back(U); + } + + while (!Worklist.empty()) { + Use *U = Worklist.pop_back_val(); + Instruction *I = cast<Instruction>(U->getUser()); + V = U->get(); + + switch (I->getOpcode()) { + case Instruction::Call: + case Instruction::Invoke: { + CallSite CS(I); + // Not captured if the callee is readonly, doesn't return a copy through + // its return value and doesn't unwind (a readonly function can leak bits + // by throwing an exception or not depending on the input value). + if (CS.onlyReadsMemory() && CS.doesNotThrow() && I->getType()->isVoidTy()) + break; + + // Not captured if only passed via 'nocapture' arguments. Note that + // calling a function pointer does not in itself cause the pointer to + // be captured. This is a subtle point considering that (for example) + // the callee might return its own address. It is analogous to saying + // that loading a value from a pointer does not cause the pointer to be + // captured, even though the loaded value might be the pointer itself + // (think of self-referential objects). + CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); + for (CallSite::arg_iterator A = B; A != E; ++A) + if (A->get() == V && !CS.paramHasAttr(A - B + 1, Attribute::NoCapture)) + // The parameter is not marked 'nocapture' - captured. + return true; + // Only passed via 'nocapture' arguments, or is the called function - not + // captured. + break; + } + case Instruction::Load: + // Loading from a pointer does not cause it to be captured. + break; + case Instruction::VAArg: + // "va-arg" from a pointer does not cause it to be captured. + break; + case Instruction::Ret: + if (ReturnCaptures) + return true; + break; + case Instruction::Store: + if (V == I->getOperand(0)) + // Stored the pointer - conservatively assume it may be captured. + // TODO: If StoreCaptures is not true, we could do Fancy analysis + // to determine whether this store is not actually an escape point. + // In that case, BasicAliasAnalysis should be updated as well to + // take advantage of this. + return true; + // Storing to the pointee does not cause the pointer to be captured. + break; + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::PHI: + case Instruction::Select: + // The original value is not captured via this if the new value isn't. + for (Instruction::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) { + Use *U = &UI.getUse(); + if (Visited.insert(U)) + Worklist.push_back(U); + } + break; + case Instruction::ICmp: + // Don't count comparisons of a no-alias return value against null as + // captures. This allows us to ignore comparisons of malloc results + // with null, for example. + if (isNoAliasCall(V->stripPointerCasts())) + if (ConstantPointerNull *CPN = + dyn_cast<ConstantPointerNull>(I->getOperand(1))) + if (CPN->getType()->getAddressSpace() == 0) + break; + // Otherwise, be conservative. There are crazy ways to capture pointers + // using comparisons. + return true; + default: + // Something else - be conservative and say it is captured. + return true; + } + } + + // All uses examined - not captured. + return false; +} diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp new file mode 100644 index 0000000..7fca17e --- /dev/null +++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp @@ -0,0 +1,1412 @@ +//===-- ConstantFolding.cpp - Fold instructions into constants ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines routines for folding instructions into constants. +// +// Also, to supplement the basic VMCore ConstantExpr simplifications, +// this file defines some additional folding routines that can make use of +// TargetData information. These functions cannot go in VMCore due to library +// dependency issues. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Operator.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/FEnv.h" +#include <cerrno> +#include <cmath> +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Constant Folding internal helper functions +//===----------------------------------------------------------------------===// + +/// FoldBitCast - Constant fold bitcast, symbolically evaluating it with +/// TargetData. This always returns a non-null constant, but it may be a +/// ConstantExpr if unfoldable. +static Constant *FoldBitCast(Constant *C, const Type *DestTy, + const TargetData &TD) { + + // This only handles casts to vectors currently. + const VectorType *DestVTy = dyn_cast<VectorType>(DestTy); + if (DestVTy == 0) + return ConstantExpr::getBitCast(C, DestTy); + + // If this is a scalar -> vector cast, convert the input into a <1 x scalar> + // vector so the code below can handle it uniformly. + if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) { + Constant *Ops = C; // don't take the address of C! + return FoldBitCast(ConstantVector::get(Ops), DestTy, TD); + } + + // If this is a bitcast from constant vector -> vector, fold it. + ConstantVector *CV = dyn_cast<ConstantVector>(C); + if (CV == 0) + return ConstantExpr::getBitCast(C, DestTy); + + // If the element types match, VMCore can fold it. + unsigned NumDstElt = DestVTy->getNumElements(); + unsigned NumSrcElt = CV->getNumOperands(); + if (NumDstElt == NumSrcElt) + return ConstantExpr::getBitCast(C, DestTy); + + const Type *SrcEltTy = CV->getType()->getElementType(); + const Type *DstEltTy = DestVTy->getElementType(); + + // Otherwise, we're changing the number of elements in a vector, which + // requires endianness information to do the right thing. For example, + // bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>) + // folds to (little endian): + // <4 x i32> <i32 0, i32 0, i32 1, i32 0> + // and to (big endian): + // <4 x i32> <i32 0, i32 0, i32 0, i32 1> + + // First thing is first. We only want to think about integer here, so if + // we have something in FP form, recast it as integer. + if (DstEltTy->isFloatingPointTy()) { + // Fold to an vector of integers with same size as our FP type. + unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits(); + const Type *DestIVTy = + VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt); + // Recursively handle this integer conversion, if possible. + C = FoldBitCast(C, DestIVTy, TD); + if (!C) return ConstantExpr::getBitCast(C, DestTy); + + // Finally, VMCore can handle this now that #elts line up. + return ConstantExpr::getBitCast(C, DestTy); + } + + // Okay, we know the destination is integer, if the input is FP, convert + // it to integer first. + if (SrcEltTy->isFloatingPointTy()) { + unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); + const Type *SrcIVTy = + VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt); + // Ask VMCore to do the conversion now that #elts line up. + C = ConstantExpr::getBitCast(C, SrcIVTy); + CV = dyn_cast<ConstantVector>(C); + if (!CV) // If VMCore wasn't able to fold it, bail out. + return C; + } + + // Now we know that the input and output vectors are both integer vectors + // of the same size, and that their #elements is not the same. Do the + // conversion here, which depends on whether the input or output has + // more elements. + bool isLittleEndian = TD.isLittleEndian(); + + SmallVector<Constant*, 32> Result; + if (NumDstElt < NumSrcElt) { + // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>) + Constant *Zero = Constant::getNullValue(DstEltTy); + unsigned Ratio = NumSrcElt/NumDstElt; + unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits(); + unsigned SrcElt = 0; + for (unsigned i = 0; i != NumDstElt; ++i) { + // Build each element of the result. + Constant *Elt = Zero; + unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1); + for (unsigned j = 0; j != Ratio; ++j) { + Constant *Src = dyn_cast<ConstantInt>(CV->getOperand(SrcElt++)); + if (!Src) // Reject constantexpr elements. + return ConstantExpr::getBitCast(C, DestTy); + + // Zero extend the element to the right size. + Src = ConstantExpr::getZExt(Src, Elt->getType()); + + // Shift it to the right place, depending on endianness. + Src = ConstantExpr::getShl(Src, + ConstantInt::get(Src->getType(), ShiftAmt)); + ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize; + + // Mix it in. + Elt = ConstantExpr::getOr(Elt, Src); + } + Result.push_back(Elt); + } + } else { + // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>) + unsigned Ratio = NumDstElt/NumSrcElt; + unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits(); + + // Loop over each source value, expanding into multiple results. + for (unsigned i = 0; i != NumSrcElt; ++i) { + Constant *Src = dyn_cast<ConstantInt>(CV->getOperand(i)); + if (!Src) // Reject constantexpr elements. + return ConstantExpr::getBitCast(C, DestTy); + + unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1); + for (unsigned j = 0; j != Ratio; ++j) { + // Shift the piece of the value into the right place, depending on + // endianness. + Constant *Elt = ConstantExpr::getLShr(Src, + ConstantInt::get(Src->getType(), ShiftAmt)); + ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize; + + // Truncate and remember this piece. + Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy)); + } + } + } + + return ConstantVector::get(Result); +} + + +/// IsConstantOffsetFromGlobal - If this constant is actually a constant offset +/// from a global, return the global and the constant. Because of +/// constantexprs, this function is recursive. +static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, + int64_t &Offset, const TargetData &TD) { + // Trivial case, constant is the global. + if ((GV = dyn_cast<GlobalValue>(C))) { + Offset = 0; + return true; + } + + // Otherwise, if this isn't a constant expr, bail out. + ConstantExpr *CE = dyn_cast<ConstantExpr>(C); + if (!CE) return false; + + // Look through ptr->int and ptr->ptr casts. + if (CE->getOpcode() == Instruction::PtrToInt || + CE->getOpcode() == Instruction::BitCast) + return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD); + + // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) + if (CE->getOpcode() == Instruction::GetElementPtr) { + // Cannot compute this if the element type of the pointer is missing size + // info. + if (!cast<PointerType>(CE->getOperand(0)->getType()) + ->getElementType()->isSized()) + return false; + + // If the base isn't a global+constant, we aren't either. + if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD)) + return false; + + // Otherwise, add any offset that our operands provide. + gep_type_iterator GTI = gep_type_begin(CE); + for (User::const_op_iterator i = CE->op_begin() + 1, e = CE->op_end(); + i != e; ++i, ++GTI) { + ConstantInt *CI = dyn_cast<ConstantInt>(*i); + if (!CI) return false; // Index isn't a simple constant? + if (CI->isZero()) continue; // Not adding anything. + + if (const StructType *ST = dyn_cast<StructType>(*GTI)) { + // N = N + Offset + Offset += TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue()); + } else { + const SequentialType *SQT = cast<SequentialType>(*GTI); + Offset += TD.getTypeAllocSize(SQT->getElementType())*CI->getSExtValue(); + } + } + return true; + } + + return false; +} + +/// ReadDataFromGlobal - Recursive helper to read bits out of global. C is the +/// constant being copied out of. ByteOffset is an offset into C. CurPtr is the +/// pointer to copy results into and BytesLeft is the number of bytes left in +/// the CurPtr buffer. TD is the target data. +static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, + unsigned char *CurPtr, unsigned BytesLeft, + const TargetData &TD) { + assert(ByteOffset <= TD.getTypeAllocSize(C->getType()) && + "Out of range access"); + + // If this element is zero or undefined, we can just return since *CurPtr is + // zero initialized. + if (isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) + return true; + + if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) { + if (CI->getBitWidth() > 64 || + (CI->getBitWidth() & 7) != 0) + return false; + + uint64_t Val = CI->getZExtValue(); + unsigned IntBytes = unsigned(CI->getBitWidth()/8); + + for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) { + CurPtr[i] = (unsigned char)(Val >> (ByteOffset * 8)); + ++ByteOffset; + } + return true; + } + + if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) { + if (CFP->getType()->isDoubleTy()) { + C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), TD); + return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD); + } + if (CFP->getType()->isFloatTy()){ + C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), TD); + return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD); + } + return false; + } + + if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) { + const StructLayout *SL = TD.getStructLayout(CS->getType()); + unsigned Index = SL->getElementContainingOffset(ByteOffset); + uint64_t CurEltOffset = SL->getElementOffset(Index); + ByteOffset -= CurEltOffset; + + while (1) { + // If the element access is to the element itself and not to tail padding, + // read the bytes from the element. + uint64_t EltSize = TD.getTypeAllocSize(CS->getOperand(Index)->getType()); + + if (ByteOffset < EltSize && + !ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr, + BytesLeft, TD)) + return false; + + ++Index; + + // Check to see if we read from the last struct element, if so we're done. + if (Index == CS->getType()->getNumElements()) + return true; + + // If we read all of the bytes we needed from this element we're done. + uint64_t NextEltOffset = SL->getElementOffset(Index); + + if (BytesLeft <= NextEltOffset-CurEltOffset-ByteOffset) + return true; + + // Move to the next element of the struct. + CurPtr += NextEltOffset-CurEltOffset-ByteOffset; + BytesLeft -= NextEltOffset-CurEltOffset-ByteOffset; + ByteOffset = 0; + CurEltOffset = NextEltOffset; + } + // not reached. + } + + if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) { + uint64_t EltSize = TD.getTypeAllocSize(CA->getType()->getElementType()); + uint64_t Index = ByteOffset / EltSize; + uint64_t Offset = ByteOffset - Index * EltSize; + for (; Index != CA->getType()->getNumElements(); ++Index) { + if (!ReadDataFromGlobal(CA->getOperand(Index), Offset, CurPtr, + BytesLeft, TD)) + return false; + if (EltSize >= BytesLeft) + return true; + + Offset = 0; + BytesLeft -= EltSize; + CurPtr += EltSize; + } + return true; + } + + if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) { + uint64_t EltSize = TD.getTypeAllocSize(CV->getType()->getElementType()); + uint64_t Index = ByteOffset / EltSize; + uint64_t Offset = ByteOffset - Index * EltSize; + for (; Index != CV->getType()->getNumElements(); ++Index) { + if (!ReadDataFromGlobal(CV->getOperand(Index), Offset, CurPtr, + BytesLeft, TD)) + return false; + if (EltSize >= BytesLeft) + return true; + + Offset = 0; + BytesLeft -= EltSize; + CurPtr += EltSize; + } + return true; + } + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { + if (CE->getOpcode() == Instruction::IntToPtr && + CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext())) + return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, + BytesLeft, TD); + } + + // Otherwise, unknown initializer type. + return false; +} + +static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, + const TargetData &TD) { + const Type *LoadTy = cast<PointerType>(C->getType())->getElementType(); + const IntegerType *IntType = dyn_cast<IntegerType>(LoadTy); + + // If this isn't an integer load we can't fold it directly. + if (!IntType) { + // If this is a float/double load, we can try folding it as an int32/64 load + // and then bitcast the result. This can be useful for union cases. Note + // that address spaces don't matter here since we're not going to result in + // an actual new load. + const Type *MapTy; + if (LoadTy->isFloatTy()) + MapTy = Type::getInt32PtrTy(C->getContext()); + else if (LoadTy->isDoubleTy()) + MapTy = Type::getInt64PtrTy(C->getContext()); + else if (LoadTy->isVectorTy()) { + MapTy = IntegerType::get(C->getContext(), + TD.getTypeAllocSizeInBits(LoadTy)); + MapTy = PointerType::getUnqual(MapTy); + } else + return 0; + + C = FoldBitCast(C, MapTy, TD); + if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, TD)) + return FoldBitCast(Res, LoadTy, TD); + return 0; + } + + unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8; + if (BytesLoaded > 32 || BytesLoaded == 0) return 0; + + GlobalValue *GVal; + int64_t Offset; + if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD)) + return 0; + + GlobalVariable *GV = dyn_cast<GlobalVariable>(GVal); + if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() || + !GV->getInitializer()->getType()->isSized()) + return 0; + + // If we're loading off the beginning of the global, some bytes may be valid, + // but we don't try to handle this. + if (Offset < 0) return 0; + + // If we're not accessing anything in this constant, the result is undefined. + if (uint64_t(Offset) >= TD.getTypeAllocSize(GV->getInitializer()->getType())) + return UndefValue::get(IntType); + + unsigned char RawBytes[32] = {0}; + if (!ReadDataFromGlobal(GV->getInitializer(), Offset, RawBytes, + BytesLoaded, TD)) + return 0; + + APInt ResultVal = APInt(IntType->getBitWidth(), RawBytes[BytesLoaded-1]); + for (unsigned i = 1; i != BytesLoaded; ++i) { + ResultVal <<= 8; + ResultVal |= RawBytes[BytesLoaded-1-i]; + } + + return ConstantInt::get(IntType->getContext(), ResultVal); +} + +/// ConstantFoldLoadFromConstPtr - Return the value that a load from C would +/// produce if it is constant and determinable. If this is not determinable, +/// return null. +Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, + const TargetData *TD) { + // First, try the easy cases: + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) + if (GV->isConstant() && GV->hasDefinitiveInitializer()) + return GV->getInitializer(); + + // If the loaded value isn't a constant expr, we can't handle it. + ConstantExpr *CE = dyn_cast<ConstantExpr>(C); + if (!CE) return 0; + + if (CE->getOpcode() == Instruction::GetElementPtr) { + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0))) + if (GV->isConstant() && GV->hasDefinitiveInitializer()) + if (Constant *V = + ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE)) + return V; + } + + // Instead of loading constant c string, use corresponding integer value + // directly if string length is small enough. + std::string Str; + if (TD && GetConstantStringInfo(CE, Str) && !Str.empty()) { + unsigned StrLen = Str.length(); + const Type *Ty = cast<PointerType>(CE->getType())->getElementType(); + unsigned NumBits = Ty->getPrimitiveSizeInBits(); + // Replace load with immediate integer if the result is an integer or fp + // value. + if ((NumBits >> 3) == StrLen + 1 && (NumBits & 7) == 0 && + (isa<IntegerType>(Ty) || Ty->isFloatingPointTy())) { + APInt StrVal(NumBits, 0); + APInt SingleChar(NumBits, 0); + if (TD->isLittleEndian()) { + for (signed i = StrLen-1; i >= 0; i--) { + SingleChar = (uint64_t) Str[i] & UCHAR_MAX; + StrVal = (StrVal << 8) | SingleChar; + } + } else { + for (unsigned i = 0; i < StrLen; i++) { + SingleChar = (uint64_t) Str[i] & UCHAR_MAX; + StrVal = (StrVal << 8) | SingleChar; + } + // Append NULL at the end. + SingleChar = 0; + StrVal = (StrVal << 8) | SingleChar; + } + + Constant *Res = ConstantInt::get(CE->getContext(), StrVal); + if (Ty->isFloatingPointTy()) + Res = ConstantExpr::getBitCast(Res, Ty); + return Res; + } + } + + // If this load comes from anywhere in a constant global, and if the global + // is all undef or zero, we know what it loads. + if (GlobalVariable *GV = + dyn_cast<GlobalVariable>(GetUnderlyingObject(CE, TD))) { + if (GV->isConstant() && GV->hasDefinitiveInitializer()) { + const Type *ResTy = cast<PointerType>(C->getType())->getElementType(); + if (GV->getInitializer()->isNullValue()) + return Constant::getNullValue(ResTy); + if (isa<UndefValue>(GV->getInitializer())) + return UndefValue::get(ResTy); + } + } + + // Try hard to fold loads from bitcasted strange and non-type-safe things. We + // currently don't do any of this for big endian systems. It can be + // generalized in the future if someone is interested. + if (TD && TD->isLittleEndian()) + return FoldReinterpretLoadFromConstPtr(CE, *TD); + return 0; +} + +static Constant *ConstantFoldLoadInst(const LoadInst *LI, const TargetData *TD){ + if (LI->isVolatile()) return 0; + + if (Constant *C = dyn_cast<Constant>(LI->getOperand(0))) + return ConstantFoldLoadFromConstPtr(C, TD); + + return 0; +} + +/// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression. +/// Attempt to symbolically evaluate the result of a binary operator merging +/// these together. If target data info is available, it is provided as TD, +/// otherwise TD is null. +static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, + Constant *Op1, const TargetData *TD){ + // SROA + + // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl. + // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute + // bits. + + + // If the constant expr is something like &A[123] - &A[4].f, fold this into a + // constant. This happens frequently when iterating over a global array. + if (Opc == Instruction::Sub && TD) { + GlobalValue *GV1, *GV2; + int64_t Offs1, Offs2; + + if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *TD)) + if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *TD) && + GV1 == GV2) { + // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow. + return ConstantInt::get(Op0->getType(), Offs1-Offs2); + } + } + + return 0; +} + +/// CastGEPIndices - If array indices are not pointer-sized integers, +/// explicitly cast them so that they aren't implicitly casted by the +/// getelementptr. +static Constant *CastGEPIndices(Constant *const *Ops, unsigned NumOps, + const Type *ResultTy, + const TargetData *TD) { + if (!TD) return 0; + const Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext()); + + bool Any = false; + SmallVector<Constant*, 32> NewIdxs; + for (unsigned i = 1; i != NumOps; ++i) { + if ((i == 1 || + !isa<StructType>(GetElementPtrInst::getIndexedType(Ops[0]->getType(), + reinterpret_cast<Value *const *>(Ops+1), + i-1))) && + Ops[i]->getType() != IntPtrTy) { + Any = true; + NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i], + true, + IntPtrTy, + true), + Ops[i], IntPtrTy)); + } else + NewIdxs.push_back(Ops[i]); + } + if (!Any) return 0; + + Constant *C = + ConstantExpr::getGetElementPtr(Ops[0], &NewIdxs[0], NewIdxs.size()); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + C = Folded; + return C; +} + +/// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP +/// constant expression, do so. +static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, + const Type *ResultTy, + const TargetData *TD) { + Constant *Ptr = Ops[0]; + if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized()) + return 0; + + const Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext()); + + // If this is a constant expr gep that is effectively computing an + // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12' + for (unsigned i = 1; i != NumOps; ++i) + if (!isa<ConstantInt>(Ops[i])) { + + // If this is "gep i8* Ptr, (sub 0, V)", fold this as: + // "inttoptr (sub (ptrtoint Ptr), V)" + if (NumOps == 2 && + cast<PointerType>(ResultTy)->getElementType()->isIntegerTy(8)) { + ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[1]); + assert((CE == 0 || CE->getType() == IntPtrTy) && + "CastGEPIndices didn't canonicalize index types!"); + if (CE && CE->getOpcode() == Instruction::Sub && + CE->getOperand(0)->isNullValue()) { + Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType()); + Res = ConstantExpr::getSub(Res, CE->getOperand(1)); + Res = ConstantExpr::getIntToPtr(Res, ResultTy); + if (ConstantExpr *ResCE = dyn_cast<ConstantExpr>(Res)) + Res = ConstantFoldConstantExpression(ResCE, TD); + return Res; + } + } + return 0; + } + + unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy); + APInt Offset = APInt(BitWidth, + TD->getIndexedOffset(Ptr->getType(), + (Value**)Ops+1, NumOps-1)); + Ptr = cast<Constant>(Ptr->stripPointerCasts()); + + // If this is a GEP of a GEP, fold it all into a single GEP. + while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) { + SmallVector<Value *, 4> NestedOps(GEP->op_begin()+1, GEP->op_end()); + + // Do not try the incorporate the sub-GEP if some index is not a number. + bool AllConstantInt = true; + for (unsigned i = 0, e = NestedOps.size(); i != e; ++i) + if (!isa<ConstantInt>(NestedOps[i])) { + AllConstantInt = false; + break; + } + if (!AllConstantInt) + break; + + Ptr = cast<Constant>(GEP->getOperand(0)); + Offset += APInt(BitWidth, + TD->getIndexedOffset(Ptr->getType(), + (Value**)NestedOps.data(), + NestedOps.size())); + Ptr = cast<Constant>(Ptr->stripPointerCasts()); + } + + // If the base value for this address is a literal integer value, fold the + // getelementptr to the resulting integer value casted to the pointer type. + APInt BasePtr(BitWidth, 0); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) + if (CE->getOpcode() == Instruction::IntToPtr) + if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0))) + BasePtr = Base->getValue().zextOrTrunc(BitWidth); + if (Ptr->isNullValue() || BasePtr != 0) { + Constant *C = ConstantInt::get(Ptr->getContext(), Offset+BasePtr); + return ConstantExpr::getIntToPtr(C, ResultTy); + } + + // Otherwise form a regular getelementptr. Recompute the indices so that + // we eliminate over-indexing of the notional static type array bounds. + // This makes it easy to determine if the getelementptr is "inbounds". + // Also, this helps GlobalOpt do SROA on GlobalVariables. + const Type *Ty = Ptr->getType(); + SmallVector<Constant*, 32> NewIdxs; + do { + if (const SequentialType *ATy = dyn_cast<SequentialType>(Ty)) { + if (ATy->isPointerTy()) { + // The only pointer indexing we'll do is on the first index of the GEP. + if (!NewIdxs.empty()) + break; + + // Only handle pointers to sized types, not pointers to functions. + if (!ATy->getElementType()->isSized()) + return 0; + } + + // Determine which element of the array the offset points into. + APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType())); + const IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext()); + if (ElemSize == 0) + // The element size is 0. This may be [0 x Ty]*, so just use a zero + // index for this level and proceed to the next level to see if it can + // accommodate the offset. + NewIdxs.push_back(ConstantInt::get(IntPtrTy, 0)); + else { + // The element size is non-zero divide the offset by the element + // size (rounding down), to compute the index at this level. + APInt NewIdx = Offset.udiv(ElemSize); + Offset -= NewIdx * ElemSize; + NewIdxs.push_back(ConstantInt::get(IntPtrTy, NewIdx)); + } + Ty = ATy->getElementType(); + } else if (const StructType *STy = dyn_cast<StructType>(Ty)) { + // Determine which field of the struct the offset points into. The + // getZExtValue is at least as safe as the StructLayout API because we + // know the offset is within the struct at this point. + const StructLayout &SL = *TD->getStructLayout(STy); + unsigned ElIdx = SL.getElementContainingOffset(Offset.getZExtValue()); + NewIdxs.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()), + ElIdx)); + Offset -= APInt(BitWidth, SL.getElementOffset(ElIdx)); + Ty = STy->getTypeAtIndex(ElIdx); + } else { + // We've reached some non-indexable type. + break; + } + } while (Ty != cast<PointerType>(ResultTy)->getElementType()); + + // If we haven't used up the entire offset by descending the static + // type, then the offset is pointing into the middle of an indivisible + // member, so we can't simplify it. + if (Offset != 0) + return 0; + + // Create a GEP. + Constant *C = + ConstantExpr::getGetElementPtr(Ptr, &NewIdxs[0], NewIdxs.size()); + assert(cast<PointerType>(C->getType())->getElementType() == Ty && + "Computed GetElementPtr has unexpected type!"); + + // If we ended up indexing a member with a type that doesn't match + // the type of what the original indices indexed, add a cast. + if (Ty != cast<PointerType>(ResultTy)->getElementType()) + C = FoldBitCast(C, ResultTy, *TD); + + return C; +} + + + +//===----------------------------------------------------------------------===// +// Constant Folding public APIs +//===----------------------------------------------------------------------===// + +/// ConstantFoldInstruction - Try to constant fold the specified instruction. +/// If successful, the constant result is returned, if not, null is returned. +/// Note that this fails if not all of the operands are constant. Otherwise, +/// this function can only fail when attempting to fold instructions like loads +/// and stores, which have no constant expression form. +Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) { + // Handle PHI nodes quickly here... + if (PHINode *PN = dyn_cast<PHINode>(I)) { + Constant *CommonValue = 0; + + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *Incoming = PN->getIncomingValue(i); + // If the incoming value is undef then skip it. Note that while we could + // skip the value if it is equal to the phi node itself we choose not to + // because that would break the rule that constant folding only applies if + // all operands are constants. + if (isa<UndefValue>(Incoming)) + continue; + // If the incoming value is not a constant, or is a different constant to + // the one we saw previously, then give up. + Constant *C = dyn_cast<Constant>(Incoming); + if (!C || (CommonValue && C != CommonValue)) + return 0; + CommonValue = C; + } + + // If we reach here, all incoming values are the same constant or undef. + return CommonValue ? CommonValue : UndefValue::get(PN->getType()); + } + + // Scan the operand list, checking to see if they are all constants, if so, + // hand off to ConstantFoldInstOperands. + SmallVector<Constant*, 8> Ops; + for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) + if (Constant *Op = dyn_cast<Constant>(*i)) + Ops.push_back(Op); + else + return 0; // All operands not constant! + + if (const CmpInst *CI = dyn_cast<CmpInst>(I)) + return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1], + TD); + + if (const LoadInst *LI = dyn_cast<LoadInst>(I)) + return ConstantFoldLoadInst(LI, TD); + + if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I)) + return ConstantExpr::getInsertValue( + cast<Constant>(IVI->getAggregateOperand()), + cast<Constant>(IVI->getInsertedValueOperand()), + IVI->getIndices()); + + if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I)) + return ConstantExpr::getExtractValue( + cast<Constant>(EVI->getAggregateOperand()), + EVI->getIndices()); + + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), + Ops.data(), Ops.size(), TD); +} + +/// ConstantFoldConstantExpression - Attempt to fold the constant expression +/// using the specified TargetData. If successful, the constant result is +/// result is returned, if not, null is returned. +Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, + const TargetData *TD) { + SmallVector<Constant*, 8> Ops; + for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); + i != e; ++i) { + Constant *NewC = cast<Constant>(*i); + // Recursively fold the ConstantExpr's operands. + if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) + NewC = ConstantFoldConstantExpression(NewCE, TD); + Ops.push_back(NewC); + } + + if (CE->isCompare()) + return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1], + TD); + return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), + Ops.data(), Ops.size(), TD); +} + +/// ConstantFoldInstOperands - Attempt to constant fold an instruction with the +/// specified opcode and operands. If successful, the constant result is +/// returned, if not, null is returned. Note that this function can fail when +/// attempting to fold instructions like loads and stores, which have no +/// constant expression form. +/// +/// TODO: This function neither utilizes nor preserves nsw/nuw/inbounds/etc +/// information, due to only being passed an opcode and operands. Constant +/// folding using this function strips this information. +/// +Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, + Constant* const* Ops, unsigned NumOps, + const TargetData *TD) { + // Handle easy binops first. + if (Instruction::isBinaryOp(Opcode)) { + if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1])) + if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD)) + return C; + + return ConstantExpr::get(Opcode, Ops[0], Ops[1]); + } + + switch (Opcode) { + default: return 0; + case Instruction::ICmp: + case Instruction::FCmp: assert(0 && "Invalid for compares"); + case Instruction::Call: + if (Function *F = dyn_cast<Function>(Ops[NumOps - 1])) + if (canConstantFoldCallTo(F)) + return ConstantFoldCall(F, Ops, NumOps - 1); + return 0; + case Instruction::PtrToInt: + // If the input is a inttoptr, eliminate the pair. This requires knowing + // the width of a pointer, so it can't be done in ConstantExpr::getCast. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) { + if (TD && CE->getOpcode() == Instruction::IntToPtr) { + Constant *Input = CE->getOperand(0); + unsigned InWidth = Input->getType()->getScalarSizeInBits(); + if (TD->getPointerSizeInBits() < InWidth) { + Constant *Mask = + ConstantInt::get(CE->getContext(), APInt::getLowBitsSet(InWidth, + TD->getPointerSizeInBits())); + Input = ConstantExpr::getAnd(Input, Mask); + } + // Do a zext or trunc to get to the dest size. + return ConstantExpr::getIntegerCast(Input, DestTy, false); + } + } + return ConstantExpr::getCast(Opcode, Ops[0], DestTy); + case Instruction::IntToPtr: + // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if + // the int size is >= the ptr size. This requires knowing the width of a + // pointer, so it can't be done in ConstantExpr::getCast. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) + if (TD && + TD->getPointerSizeInBits() <= CE->getType()->getScalarSizeInBits() && + CE->getOpcode() == Instruction::PtrToInt) + return FoldBitCast(CE->getOperand(0), DestTy, *TD); + + return ConstantExpr::getCast(Opcode, Ops[0], DestTy); + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPToUI: + case Instruction::FPToSI: + return ConstantExpr::getCast(Opcode, Ops[0], DestTy); + case Instruction::BitCast: + if (TD) + return FoldBitCast(Ops[0], DestTy, *TD); + return ConstantExpr::getBitCast(Ops[0], DestTy); + case Instruction::Select: + return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]); + case Instruction::ExtractElement: + return ConstantExpr::getExtractElement(Ops[0], Ops[1]); + case Instruction::InsertElement: + return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]); + case Instruction::ShuffleVector: + return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]); + case Instruction::GetElementPtr: + if (Constant *C = CastGEPIndices(Ops, NumOps, DestTy, TD)) + return C; + if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, TD)) + return C; + + return ConstantExpr::getGetElementPtr(Ops[0], Ops+1, NumOps-1); + } +} + +/// ConstantFoldCompareInstOperands - Attempt to constant fold a compare +/// instruction (icmp/fcmp) with the specified operands. If it fails, it +/// returns a constant expression of the specified operands. +/// +Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, + Constant *Ops0, Constant *Ops1, + const TargetData *TD) { + // fold: icmp (inttoptr x), null -> icmp x, 0 + // fold: icmp (ptrtoint x), 0 -> icmp x, null + // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y + // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y + // + // ConstantExpr::getCompare cannot do this, because it doesn't have TD + // around to know if bit truncation is happening. + if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) { + if (TD && Ops1->isNullValue()) { + const Type *IntPtrTy = TD->getIntPtrType(CE0->getContext()); + if (CE0->getOpcode() == Instruction::IntToPtr) { + // Convert the integer value to the right size to ensure we get the + // proper extension or truncation. + Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0), + IntPtrTy, false); + Constant *Null = Constant::getNullValue(C->getType()); + return ConstantFoldCompareInstOperands(Predicate, C, Null, TD); + } + + // Only do this transformation if the int is intptrty in size, otherwise + // there is a truncation or extension that we aren't modeling. + if (CE0->getOpcode() == Instruction::PtrToInt && + CE0->getType() == IntPtrTy) { + Constant *C = CE0->getOperand(0); + Constant *Null = Constant::getNullValue(C->getType()); + return ConstantFoldCompareInstOperands(Predicate, C, Null, TD); + } + } + + if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) { + if (TD && CE0->getOpcode() == CE1->getOpcode()) { + const Type *IntPtrTy = TD->getIntPtrType(CE0->getContext()); + + if (CE0->getOpcode() == Instruction::IntToPtr) { + // Convert the integer value to the right size to ensure we get the + // proper extension or truncation. + Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0), + IntPtrTy, false); + Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0), + IntPtrTy, false); + return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD); + } + + // Only do this transformation if the int is intptrty in size, otherwise + // there is a truncation or extension that we aren't modeling. + if ((CE0->getOpcode() == Instruction::PtrToInt && + CE0->getType() == IntPtrTy && + CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType())) + return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), + CE1->getOperand(0), TD); + } + } + + // icmp eq (or x, y), 0 -> (icmp eq x, 0) & (icmp eq y, 0) + // icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0) + if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) && + CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) { + Constant *LHS = + ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1,TD); + Constant *RHS = + ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1,TD); + unsigned OpC = + Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; + Constant *Ops[] = { LHS, RHS }; + return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, 2, TD); + } + } + + return ConstantExpr::getCompare(Predicate, Ops0, Ops1); +} + + +/// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a +/// getelementptr constantexpr, return the constant value being addressed by the +/// constant expression, or null if something is funny and we can't decide. +Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, + ConstantExpr *CE) { + if (CE->getOperand(1) != Constant::getNullValue(CE->getOperand(1)->getType())) + return 0; // Do not allow stepping over the value! + + // Loop over all of the operands, tracking down which value we are + // addressing... + gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE); + for (++I; I != E; ++I) + if (const StructType *STy = dyn_cast<StructType>(*I)) { + ConstantInt *CU = cast<ConstantInt>(I.getOperand()); + assert(CU->getZExtValue() < STy->getNumElements() && + "Struct index out of range!"); + unsigned El = (unsigned)CU->getZExtValue(); + if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) { + C = CS->getOperand(El); + } else if (isa<ConstantAggregateZero>(C)) { + C = Constant::getNullValue(STy->getElementType(El)); + } else if (isa<UndefValue>(C)) { + C = UndefValue::get(STy->getElementType(El)); + } else { + return 0; + } + } else if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand())) { + if (const ArrayType *ATy = dyn_cast<ArrayType>(*I)) { + if (CI->getZExtValue() >= ATy->getNumElements()) + return 0; + if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) + C = CA->getOperand(CI->getZExtValue()); + else if (isa<ConstantAggregateZero>(C)) + C = Constant::getNullValue(ATy->getElementType()); + else if (isa<UndefValue>(C)) + C = UndefValue::get(ATy->getElementType()); + else + return 0; + } else if (const VectorType *VTy = dyn_cast<VectorType>(*I)) { + if (CI->getZExtValue() >= VTy->getNumElements()) + return 0; + if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) + C = CP->getOperand(CI->getZExtValue()); + else if (isa<ConstantAggregateZero>(C)) + C = Constant::getNullValue(VTy->getElementType()); + else if (isa<UndefValue>(C)) + C = UndefValue::get(VTy->getElementType()); + else + return 0; + } else { + return 0; + } + } else { + return 0; + } + return C; +} + + +//===----------------------------------------------------------------------===// +// Constant Folding for Calls +// + +/// canConstantFoldCallTo - Return true if its even possible to fold a call to +/// the specified function. +bool +llvm::canConstantFoldCallTo(const Function *F) { + switch (F->getIntrinsicID()) { + case Intrinsic::sqrt: + case Intrinsic::powi: + case Intrinsic::bswap: + case Intrinsic::ctpop: + case Intrinsic::ctlz: + case Intrinsic::cttz: + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: + case Intrinsic::convert_from_fp16: + case Intrinsic::convert_to_fp16: + case Intrinsic::x86_sse_cvtss2si: + case Intrinsic::x86_sse_cvtss2si64: + case Intrinsic::x86_sse_cvttss2si: + case Intrinsic::x86_sse_cvttss2si64: + case Intrinsic::x86_sse2_cvtsd2si: + case Intrinsic::x86_sse2_cvtsd2si64: + case Intrinsic::x86_sse2_cvttsd2si: + case Intrinsic::x86_sse2_cvttsd2si64: + return true; + default: + return false; + case 0: break; + } + + if (!F->hasName()) return false; + StringRef Name = F->getName(); + + // In these cases, the check of the length is required. We don't want to + // return true for a name like "cos\0blah" which strcmp would return equal to + // "cos", but has length 8. + switch (Name[0]) { + default: return false; + case 'a': + return Name == "acos" || Name == "asin" || + Name == "atan" || Name == "atan2"; + case 'c': + return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh"; + case 'e': + return Name == "exp" || Name == "exp2"; + case 'f': + return Name == "fabs" || Name == "fmod" || Name == "floor"; + case 'l': + return Name == "log" || Name == "log10"; + case 'p': + return Name == "pow"; + case 's': + return Name == "sin" || Name == "sinh" || Name == "sqrt" || + Name == "sinf" || Name == "sqrtf"; + case 't': + return Name == "tan" || Name == "tanh"; + } +} + +static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, + const Type *Ty) { + sys::llvm_fenv_clearexcept(); + V = NativeFP(V); + if (sys::llvm_fenv_testexcept()) { + sys::llvm_fenv_clearexcept(); + return 0; + } + + if (Ty->isFloatTy()) + return ConstantFP::get(Ty->getContext(), APFloat((float)V)); + if (Ty->isDoubleTy()) + return ConstantFP::get(Ty->getContext(), APFloat(V)); + llvm_unreachable("Can only constant fold float/double"); + return 0; // dummy return to suppress warning +} + +static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), + double V, double W, const Type *Ty) { + sys::llvm_fenv_clearexcept(); + V = NativeFP(V, W); + if (sys::llvm_fenv_testexcept()) { + sys::llvm_fenv_clearexcept(); + return 0; + } + + if (Ty->isFloatTy()) + return ConstantFP::get(Ty->getContext(), APFloat((float)V)); + if (Ty->isDoubleTy()) + return ConstantFP::get(Ty->getContext(), APFloat(V)); + llvm_unreachable("Can only constant fold float/double"); + return 0; // dummy return to suppress warning +} + +/// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer +/// conversion of a constant floating point. If roundTowardZero is false, the +/// default IEEE rounding is used (toward nearest, ties to even). This matches +/// the behavior of the non-truncating SSE instructions in the default rounding +/// mode. The desired integer type Ty is used to select how many bits are +/// available for the result. Returns null if the conversion cannot be +/// performed, otherwise returns the Constant value resulting from the +/// conversion. +static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero, + const Type *Ty) { + assert(Op && "Called with NULL operand"); + APFloat Val(Op->getValueAPF()); + + // All of these conversion intrinsics form an integer of at most 64bits. + unsigned ResultWidth = cast<IntegerType>(Ty)->getBitWidth(); + assert(ResultWidth <= 64 && + "Can only constant fold conversions to 64 and 32 bit ints"); + + uint64_t UIntVal; + bool isExact = false; + APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero + : APFloat::rmNearestTiesToEven; + APFloat::opStatus status = Val.convertToInteger(&UIntVal, ResultWidth, + /*isSigned=*/true, mode, + &isExact); + if (status != APFloat::opOK && status != APFloat::opInexact) + return 0; + return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true); +} + +/// ConstantFoldCall - Attempt to constant fold a call to the specified function +/// with the specified arguments, returning null if unsuccessful. +Constant * +llvm::ConstantFoldCall(Function *F, + Constant *const *Operands, unsigned NumOperands) { + if (!F->hasName()) return 0; + StringRef Name = F->getName(); + + const Type *Ty = F->getReturnType(); + if (NumOperands == 1) { + if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) { + if (F->getIntrinsicID() == Intrinsic::convert_to_fp16) { + APFloat Val(Op->getValueAPF()); + + bool lost = false; + Val.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &lost); + + return ConstantInt::get(F->getContext(), Val.bitcastToAPInt()); + } + + if (!Ty->isFloatTy() && !Ty->isDoubleTy()) + return 0; + + /// We only fold functions with finite arguments. Folding NaN and inf is + /// likely to be aborted with an exception anyway, and some host libms + /// have known errors raising exceptions. + if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity()) + return 0; + + /// Currently APFloat versions of these functions do not exist, so we use + /// the host native double versions. Float versions are not called + /// directly but for all these it is true (float)(f((double)arg)) == + /// f(arg). Long double not supported yet. + double V = Ty->isFloatTy() ? (double)Op->getValueAPF().convertToFloat() : + Op->getValueAPF().convertToDouble(); + switch (Name[0]) { + case 'a': + if (Name == "acos") + return ConstantFoldFP(acos, V, Ty); + else if (Name == "asin") + return ConstantFoldFP(asin, V, Ty); + else if (Name == "atan") + return ConstantFoldFP(atan, V, Ty); + break; + case 'c': + if (Name == "ceil") + return ConstantFoldFP(ceil, V, Ty); + else if (Name == "cos") + return ConstantFoldFP(cos, V, Ty); + else if (Name == "cosh") + return ConstantFoldFP(cosh, V, Ty); + else if (Name == "cosf") + return ConstantFoldFP(cos, V, Ty); + break; + case 'e': + if (Name == "exp") + return ConstantFoldFP(exp, V, Ty); + + if (Name == "exp2") { + // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a + // C99 library. + return ConstantFoldBinaryFP(pow, 2.0, V, Ty); + } + break; + case 'f': + if (Name == "fabs") + return ConstantFoldFP(fabs, V, Ty); + else if (Name == "floor") + return ConstantFoldFP(floor, V, Ty); + break; + case 'l': + if (Name == "log" && V > 0) + return ConstantFoldFP(log, V, Ty); + else if (Name == "log10" && V > 0) + return ConstantFoldFP(log10, V, Ty); + else if (F->getIntrinsicID() == Intrinsic::sqrt && + (Ty->isFloatTy() || Ty->isDoubleTy())) { + if (V >= -0.0) + return ConstantFoldFP(sqrt, V, Ty); + else // Undefined + return Constant::getNullValue(Ty); + } + break; + case 's': + if (Name == "sin") + return ConstantFoldFP(sin, V, Ty); + else if (Name == "sinh") + return ConstantFoldFP(sinh, V, Ty); + else if (Name == "sqrt" && V >= 0) + return ConstantFoldFP(sqrt, V, Ty); + else if (Name == "sqrtf" && V >= 0) + return ConstantFoldFP(sqrt, V, Ty); + else if (Name == "sinf") + return ConstantFoldFP(sin, V, Ty); + break; + case 't': + if (Name == "tan") + return ConstantFoldFP(tan, V, Ty); + else if (Name == "tanh") + return ConstantFoldFP(tanh, V, Ty); + break; + default: + break; + } + return 0; + } + + if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) { + switch (F->getIntrinsicID()) { + case Intrinsic::bswap: + return ConstantInt::get(F->getContext(), Op->getValue().byteSwap()); + case Intrinsic::ctpop: + return ConstantInt::get(Ty, Op->getValue().countPopulation()); + case Intrinsic::cttz: + return ConstantInt::get(Ty, Op->getValue().countTrailingZeros()); + case Intrinsic::ctlz: + return ConstantInt::get(Ty, Op->getValue().countLeadingZeros()); + case Intrinsic::convert_from_fp16: { + APFloat Val(Op->getValue()); + + bool lost = false; + APFloat::opStatus status = + Val.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &lost); + + // Conversion is always precise. + (void)status; + assert(status == APFloat::opOK && !lost && + "Precision lost during fp16 constfolding"); + + return ConstantFP::get(F->getContext(), Val); + } + default: + return 0; + } + } + + if (ConstantVector *Op = dyn_cast<ConstantVector>(Operands[0])) { + switch (F->getIntrinsicID()) { + default: break; + case Intrinsic::x86_sse_cvtss2si: + case Intrinsic::x86_sse_cvtss2si64: + case Intrinsic::x86_sse2_cvtsd2si: + case Intrinsic::x86_sse2_cvtsd2si64: + if (ConstantFP *FPOp = dyn_cast<ConstantFP>(Op->getOperand(0))) + return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/false, Ty); + case Intrinsic::x86_sse_cvttss2si: + case Intrinsic::x86_sse_cvttss2si64: + case Intrinsic::x86_sse2_cvttsd2si: + case Intrinsic::x86_sse2_cvttsd2si64: + if (ConstantFP *FPOp = dyn_cast<ConstantFP>(Op->getOperand(0))) + return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/true, Ty); + } + } + + if (isa<UndefValue>(Operands[0])) { + if (F->getIntrinsicID() == Intrinsic::bswap) + return Operands[0]; + return 0; + } + + return 0; + } + + if (NumOperands == 2) { + if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) { + if (!Ty->isFloatTy() && !Ty->isDoubleTy()) + return 0; + double Op1V = Ty->isFloatTy() ? + (double)Op1->getValueAPF().convertToFloat() : + Op1->getValueAPF().convertToDouble(); + if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) { + if (Op2->getType() != Op1->getType()) + return 0; + + double Op2V = Ty->isFloatTy() ? + (double)Op2->getValueAPF().convertToFloat(): + Op2->getValueAPF().convertToDouble(); + + if (Name == "pow") + return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); + if (Name == "fmod") + return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty); + if (Name == "atan2") + return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); + } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) { + if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy()) + return ConstantFP::get(F->getContext(), + APFloat((float)std::pow((float)Op1V, + (int)Op2C->getZExtValue()))); + if (F->getIntrinsicID() == Intrinsic::powi && Ty->isDoubleTy()) + return ConstantFP::get(F->getContext(), + APFloat((double)std::pow((double)Op1V, + (int)Op2C->getZExtValue()))); + } + return 0; + } + + + if (ConstantInt *Op1 = dyn_cast<ConstantInt>(Operands[0])) { + if (ConstantInt *Op2 = dyn_cast<ConstantInt>(Operands[1])) { + switch (F->getIntrinsicID()) { + default: break; + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: { + APInt Res; + bool Overflow; + switch (F->getIntrinsicID()) { + default: assert(0 && "Invalid case"); + case Intrinsic::sadd_with_overflow: + Res = Op1->getValue().sadd_ov(Op2->getValue(), Overflow); + break; + case Intrinsic::uadd_with_overflow: + Res = Op1->getValue().uadd_ov(Op2->getValue(), Overflow); + break; + case Intrinsic::ssub_with_overflow: + Res = Op1->getValue().ssub_ov(Op2->getValue(), Overflow); + break; + case Intrinsic::usub_with_overflow: + Res = Op1->getValue().usub_ov(Op2->getValue(), Overflow); + break; + case Intrinsic::smul_with_overflow: + Res = Op1->getValue().smul_ov(Op2->getValue(), Overflow); + break; + case Intrinsic::umul_with_overflow: + Res = Op1->getValue().umul_ov(Op2->getValue(), Overflow); + break; + } + Constant *Ops[] = { + ConstantInt::get(F->getContext(), Res), + ConstantInt::get(Type::getInt1Ty(F->getContext()), Overflow) + }; + return ConstantStruct::get(cast<StructType>(F->getReturnType()), Ops); + } + } + } + + return 0; + } + return 0; + } + return 0; +} diff --git a/contrib/llvm/lib/Analysis/DIBuilder.cpp b/contrib/llvm/lib/Analysis/DIBuilder.cpp new file mode 100644 index 0000000..ac5eeeb --- /dev/null +++ b/contrib/llvm/lib/Analysis/DIBuilder.cpp @@ -0,0 +1,839 @@ +//===--- DIBuilder.cpp - Debug Information Builder ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the DIBuilder. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/DIBuilder.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Constants.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Dwarf.h" + +using namespace llvm; +using namespace llvm::dwarf; + +static Constant *GetTagConstant(LLVMContext &VMContext, unsigned Tag) { + assert((Tag & LLVMDebugVersionMask) == 0 && + "Tag too large for debug encoding!"); + return ConstantInt::get(Type::getInt32Ty(VMContext), Tag | LLVMDebugVersion); +} + +DIBuilder::DIBuilder(Module &m) + : M(m), VMContext(M.getContext()), TheCU(0), DeclareFn(0), ValueFn(0) {} + +/// createCompileUnit - A CompileUnit provides an anchor for all debugging +/// information generated during this instance of compilation. +void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, + StringRef Directory, StringRef Producer, + bool isOptimized, StringRef Flags, + unsigned RunTimeVer) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + ConstantInt::get(Type::getInt32Ty(VMContext), Lang), + MDString::get(VMContext, Filename), + MDString::get(VMContext, Directory), + MDString::get(VMContext, Producer), + // Deprecate isMain field. + ConstantInt::get(Type::getInt1Ty(VMContext), true), // isMain + ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), + MDString::get(VMContext, Flags), + ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer) + }; + TheCU = DICompileUnit(MDNode::get(VMContext, Elts)); + + // Create a named metadata so that it is easier to find cu in a module. + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu"); + NMD->addOperand(TheCU); +} + +/// createFile - Create a file descriptor to hold debugging information +/// for a file. +DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) { + assert(TheCU && "Unable to create DW_TAG_file_type without CompileUnit"); + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_file_type), + MDString::get(VMContext, Filename), + MDString::get(VMContext, Directory), + TheCU + }; + return DIFile(MDNode::get(VMContext, Elts)); +} + +/// createEnumerator - Create a single enumerator value. +DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_enumerator), + MDString::get(VMContext, Name), + ConstantInt::get(Type::getInt64Ty(VMContext), Val) + }; + return DIEnumerator(MDNode::get(VMContext, Elts)); +} + +/// createBasicType - Create debugging information entry for a basic +/// type, e.g 'char'. +DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, + uint64_t AlignInBits, + unsigned Encoding) { + // Basic types are encoded in DIBasicType format. Line number, filename, + // offset and flags are always empty here. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_base_type), + TheCU, + MDString::get(VMContext, Name), + NULL, // Filename + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags; + ConstantInt::get(Type::getInt32Ty(VMContext), Encoding) + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createQaulifiedType - Create debugging information entry for a qualified +/// type, e.g. 'const int'. +DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) { + // Qualified types are encoded in DIDerivedType format. + Value *Elts[] = { + GetTagConstant(VMContext, Tag), + TheCU, + MDString::get(VMContext, StringRef()), // Empty name. + NULL, // Filename + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags + FromTy + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createPointerType - Create debugging information entry for a pointer. +DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits, + uint64_t AlignInBits, StringRef Name) { + // Pointer types are encoded in DIDerivedType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type), + TheCU, + MDString::get(VMContext, Name), + NULL, // Filename + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags + PointeeTy + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createReferenceType - Create debugging information entry for a reference. +DIType DIBuilder::createReferenceType(DIType RTy) { + // References are encoded in DIDerivedType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_reference_type), + TheCU, + NULL, // Name + NULL, // Filename + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags + RTy + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createTypedef - Create debugging information entry for a typedef. +DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File, + unsigned LineNo, DIDescriptor Context) { + // typedefs are encoded in DIDerivedType format. + assert(Ty.Verify() && "Invalid typedef type!"); + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_typedef), + Context, + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags + Ty + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createFriend - Create debugging information entry for a 'friend'. +DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) { + // typedefs are encoded in DIDerivedType format. + assert(Ty.Verify() && "Invalid type!"); + assert(FriendTy.Verify() && "Invalid friend type!"); + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_friend), + Ty, + NULL, // Name + Ty.getFile(), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags + FriendTy + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createInheritance - Create debugging information entry to establish +/// inheritnace relationship between two types. +DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, + uint64_t BaseOffset, unsigned Flags) { + // TAG_inheritance is encoded in DIDerivedType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_inheritance), + Ty, + NULL, // Name + Ty.getFile(), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align + ConstantInt::get(Type::getInt64Ty(VMContext), BaseOffset), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + BaseTy + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createMemberType - Create debugging information entry for a member. +DIType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, + uint64_t OffsetInBits, unsigned Flags, + DIType Ty) { + // TAG_member is encoded in DIDerivedType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_member), + Scope, + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + Ty + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createObjCIVar - Create debugging information entry for Objective-C +/// instance variable. +DIType DIBuilder::createObjCIVar(StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, + uint64_t OffsetInBits, unsigned Flags, + DIType Ty, StringRef PropertyName, + StringRef GetterName, StringRef SetterName, + unsigned PropertyAttributes) { + // TAG_member is encoded in DIDerivedType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_member), + File, // Or TheCU ? Ty ? + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + Ty, + MDString::get(VMContext, PropertyName), + MDString::get(VMContext, GetterName), + MDString::get(VMContext, SetterName), + ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes) + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createClassType - Create debugging information entry for a class. +DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, + uint64_t OffsetInBits, unsigned Flags, + DIType DerivedFrom, DIArray Elements, + MDNode *VTableHoder, MDNode *TemplateParams) { + // TAG_class_type is encoded in DICompositeType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_class_type), + Context, + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), OffsetInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + DerivedFrom, + Elements, + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + VTableHoder, + TemplateParams + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createTemplateTypeParameter - Create debugging information for template +/// type parameter. +DITemplateTypeParameter +DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name, + DIType Ty, MDNode *File, unsigned LineNo, + unsigned ColumnNo) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_template_type_parameter), + Context, + MDString::get(VMContext, Name), + Ty, + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo) + }; + return DITemplateTypeParameter(MDNode::get(VMContext, Elts)); +} + +/// createTemplateValueParameter - Create debugging information for template +/// value parameter. +DITemplateValueParameter +DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name, + DIType Ty, uint64_t Val, + MDNode *File, unsigned LineNo, + unsigned ColumnNo) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_template_value_parameter), + Context, + MDString::get(VMContext, Name), + Ty, + ConstantInt::get(Type::getInt64Ty(VMContext), Val), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo) + }; + return DITemplateValueParameter(MDNode::get(VMContext, Elts)); +} + +/// createStructType - Create debugging information entry for a struct. +DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, + unsigned Flags, DIArray Elements, + unsigned RunTimeLang) { + // TAG_structure_type is encoded in DICompositeType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_structure_type), + Context, + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + Elements, + ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createUnionType - Create debugging information entry for an union. +DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, + DIFile File, + unsigned LineNumber, uint64_t SizeInBits, + uint64_t AlignInBits, unsigned Flags, + DIArray Elements, unsigned RunTimeLang) { + // TAG_union_type is encoded in DICompositeType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_union_type), + Scope, + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + Elements, + ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createSubroutineType - Create subroutine type. +DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) { + // TAG_subroutine_type is encoded in DICompositeType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type), + File, + MDString::get(VMContext, ""), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt64Ty(VMContext), 0), + ConstantInt::get(Type::getInt64Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + ParameterTypes, + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createEnumerationType - Create debugging information entry for an +/// enumeration. +DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, + uint64_t AlignInBits, DIArray Elements) { + // TAG_enumeration_type is encoded in DICompositeType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type), + Scope, + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + Elements, + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + }; + MDNode *Node = MDNode::get(VMContext, Elts); + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum"); + NMD->addOperand(Node); + return DIType(Node); +} + +/// createArrayType - Create debugging information entry for an array. +DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, + DIType Ty, DIArray Subscripts) { + // TAG_array_type is encoded in DICompositeType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_array_type), + TheCU, + MDString::get(VMContext, ""), + TheCU, + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt64Ty(VMContext), Size), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + Ty, + Subscripts, + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createVectorType - Create debugging information entry for a vector. +DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits, + DIType Ty, DIArray Subscripts) { + // TAG_vector_type is encoded in DICompositeType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_vector_type), + TheCU, + MDString::get(VMContext, ""), + TheCU, + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt64Ty(VMContext), Size), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + Ty, + Subscripts, + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createArtificialType - Create a new DIType with "artificial" flag set. +DIType DIBuilder::createArtificialType(DIType Ty) { + if (Ty.isArtificial()) + return Ty; + + SmallVector<Value *, 9> Elts; + MDNode *N = Ty; + assert (N && "Unexpected input DIType!"); + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + if (Value *V = N->getOperand(i)) + Elts.push_back(V); + else + Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext))); + } + + unsigned CurFlags = Ty.getFlags(); + CurFlags = CurFlags | DIType::FlagArtificial; + + // Flags are stored at this slot. + Elts[8] = ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags); + + return DIType(MDNode::get(VMContext, Elts)); +} + +/// retainType - Retain DIType in a module even if it is not referenced +/// through debug info anchors. +void DIBuilder::retainType(DIType T) { + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.ty"); + NMD->addOperand(T); +} + +/// createUnspecifiedParameter - Create unspeicified type descriptor +/// for the subroutine type. +DIDescriptor DIBuilder::createUnspecifiedParameter() { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters) + }; + return DIDescriptor(MDNode::get(VMContext, Elts)); +} + +/// createTemporaryType - Create a temporary forward-declared type. +DIType DIBuilder::createTemporaryType() { + // Give the temporary MDNode a tag. It doesn't matter what tag we + // use here as long as DIType accepts it. + Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; + MDNode *Node = MDNode::getTemporary(VMContext, Elts); + return DIType(Node); +} + +/// createTemporaryType - Create a temporary forward-declared type. +DIType DIBuilder::createTemporaryType(DIFile F) { + // Give the temporary MDNode a tag. It doesn't matter what tag we + // use here as long as DIType accepts it. + Value *Elts[] = { + GetTagConstant(VMContext, DW_TAG_base_type), + F.getCompileUnit(), + NULL, + F + }; + MDNode *Node = MDNode::getTemporary(VMContext, Elts); + return DIType(Node); +} + +/// getOrCreateArray - Get a DIArray, create one if required. +DIArray DIBuilder::getOrCreateArray(ArrayRef<Value *> Elements) { + if (Elements.empty()) { + Value *Null = llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)); + return DIArray(MDNode::get(VMContext, Null)); + } + return DIArray(MDNode::get(VMContext, Elements)); +} + +/// getOrCreateSubrange - Create a descriptor for a value range. This +/// implicitly uniques the values returned. +DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Hi) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_subrange_type), + ConstantInt::get(Type::getInt64Ty(VMContext), Lo), + ConstantInt::get(Type::getInt64Ty(VMContext), Hi) + }; + + return DISubrange(MDNode::get(VMContext, Elts)); +} + +/// createGlobalVariable - Create a new descriptor for the specified global. +DIGlobalVariable DIBuilder:: +createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, + DIType Ty, bool isLocalToUnit, llvm::Value *Val) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_variable), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + TheCU, + MDString::get(VMContext, Name), + MDString::get(VMContext, Name), + MDString::get(VMContext, Name), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + Ty, + ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit), + ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/ + Val + }; + MDNode *Node = MDNode::get(VMContext, Elts); + // Create a named metadata so that we do not lose this mdnode. + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); + NMD->addOperand(Node); + return DIGlobalVariable(Node); +} + +/// createStaticVariable - Create a new descriptor for the specified static +/// variable. +DIGlobalVariable DIBuilder:: +createStaticVariable(DIDescriptor Context, StringRef Name, + StringRef LinkageName, DIFile F, unsigned LineNumber, + DIType Ty, bool isLocalToUnit, llvm::Value *Val) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_variable), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + Context, + MDString::get(VMContext, Name), + MDString::get(VMContext, Name), + MDString::get(VMContext, LinkageName), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + Ty, + ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit), + ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/ + Val + }; + MDNode *Node = MDNode::get(VMContext, Elts); + // Create a named metadata so that we do not lose this mdnode. + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); + NMD->addOperand(Node); + return DIGlobalVariable(Node); +} + +/// createVariable - Create a new descriptor for the specified variable. +DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope, + StringRef Name, DIFile File, + unsigned LineNo, DIType Ty, + bool AlwaysPreserve, unsigned Flags, + unsigned ArgNo) { + Value *Elts[] = { + GetTagConstant(VMContext, Tag), + Scope, + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))), + Ty, + ConstantInt::get(Type::getInt32Ty(VMContext), Flags) + }; + MDNode *Node = MDNode::get(VMContext, Elts); + if (AlwaysPreserve) { + // The optimizer may remove local variable. If there is an interest + // to preserve variable info in such situation then stash it in a + // named mdnode. + DISubprogram Fn(getDISubprogram(Scope)); + StringRef FName = "fn"; + if (Fn.getFunction()) + FName = Fn.getFunction()->getName(); + char One = '\1'; + if (FName.startswith(StringRef(&One, 1))) + FName = FName.substr(1); + NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, FName); + FnLocals->addOperand(Node); + } + return DIVariable(Node); +} + +/// createComplexVariable - Create a new descriptor for the specified variable +/// which has a complex address expression for its address. +DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope, + StringRef Name, DIFile F, + unsigned LineNo, + DIType Ty, ArrayRef<Value *> Addr, + unsigned ArgNo) { + SmallVector<Value *, 15> Elts; + Elts.push_back(GetTagConstant(VMContext, Tag)); + Elts.push_back(Scope); + Elts.push_back(MDString::get(VMContext, Name)); + Elts.push_back(F); + Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24)))); + Elts.push_back(Ty); + Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext))); + Elts.append(Addr.begin(), Addr.end()); + + return DIVariable(MDNode::get(VMContext, Elts)); +} + +/// createFunction - Create a new descriptor for the specified function. +DISubprogram DIBuilder::createFunction(DIDescriptor Context, + StringRef Name, + StringRef LinkageName, + DIFile File, unsigned LineNo, + DIType Ty, + bool isLocalToUnit, bool isDefinition, + unsigned Flags, bool isOptimized, + Function *Fn, + MDNode *TParams, + MDNode *Decl) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + Context, + MDString::get(VMContext, Name), + MDString::get(VMContext, Name), + MDString::get(VMContext, LinkageName), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + Ty, + ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), + ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), + Fn, + TParams, + Decl + }; + MDNode *Node = MDNode::get(VMContext, Elts); + + // Create a named metadata so that we do not lose this mdnode. + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); + NMD->addOperand(Node); + return DISubprogram(Node); +} + +/// createMethod - Create a new descriptor for the specified C++ method. +DISubprogram DIBuilder::createMethod(DIDescriptor Context, + StringRef Name, + StringRef LinkageName, + DIFile F, + unsigned LineNo, DIType Ty, + bool isLocalToUnit, + bool isDefinition, + unsigned VK, unsigned VIndex, + MDNode *VTableHolder, + unsigned Flags, + bool isOptimized, + Function *Fn, + MDNode *TParam) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + Context, + MDString::get(VMContext, Name), + MDString::get(VMContext, Name), + MDString::get(VMContext, LinkageName), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + Ty, + ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), + ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), + ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK), + ConstantInt::get(Type::getInt32Ty(VMContext), VIndex), + VTableHolder, + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), + Fn, + TParam, + }; + MDNode *Node = MDNode::get(VMContext, Elts); + + // Create a named metadata so that we do not lose this mdnode. + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); + NMD->addOperand(Node); + return DISubprogram(Node); +} + +/// createNameSpace - This creates new descriptor for a namespace +/// with the specified parent scope. +DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name, + DIFile File, unsigned LineNo) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_namespace), + Scope, + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo) + }; + return DINameSpace(MDNode::get(VMContext, Elts)); +} + +DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File, + unsigned Line, unsigned Col) { + // Defeat MDNode uniqing for lexical blocks by using unique id. + static unsigned int unique_id = 0; + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block), + Scope, + ConstantInt::get(Type::getInt32Ty(VMContext), Line), + ConstantInt::get(Type::getInt32Ty(VMContext), Col), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++) + }; + return DILexicalBlock(MDNode::get(VMContext, Elts)); +} + +/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call. +Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo, + Instruction *InsertBefore) { + assert(Storage && "no storage passed to dbg.declare"); + assert(VarInfo.Verify() && "empty DIVariable passed to dbg.declare"); + if (!DeclareFn) + DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); + + Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo }; + return CallInst::Create(DeclareFn, Args, "", InsertBefore); +} + +/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call. +Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo, + BasicBlock *InsertAtEnd) { + assert(Storage && "no storage passed to dbg.declare"); + assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.declare"); + if (!DeclareFn) + DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); + + Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo }; + + // If this block already has a terminator then insert this intrinsic + // before the terminator. + if (TerminatorInst *T = InsertAtEnd->getTerminator()) + return CallInst::Create(DeclareFn, Args, "", T); + else + return CallInst::Create(DeclareFn, Args, "", InsertAtEnd); +} + +/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. +Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, + DIVariable VarInfo, + Instruction *InsertBefore) { + assert(V && "no value passed to dbg.value"); + assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value"); + if (!ValueFn) + ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); + + Value *Args[] = { MDNode::get(V->getContext(), V), + ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), + VarInfo }; + return CallInst::Create(ValueFn, Args, "", InsertBefore); +} + +/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. +Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, + DIVariable VarInfo, + BasicBlock *InsertAtEnd) { + assert(V && "no value passed to dbg.value"); + assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value"); + if (!ValueFn) + ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); + + Value *Args[] = { MDNode::get(V->getContext(), V), + ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), + VarInfo }; + return CallInst::Create(ValueFn, Args, "", InsertAtEnd); +} + diff --git a/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp b/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp new file mode 100644 index 0000000..b23c351 --- /dev/null +++ b/contrib/llvm/lib/Analysis/DbgInfoPrinter.cpp @@ -0,0 +1,224 @@ +//===- DbgInfoPrinter.cpp - Print debug info in a human readable form ------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass that prints instructions, and associated debug +// info: +// +// - source/line/col information +// - original variable name +// - original type name +// +//===----------------------------------------------------------------------===// + +#include "llvm/Pass.h" +#include "llvm/Function.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Metadata.h" +#include "llvm/Module.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt<bool> +PrintDirectory("print-fullpath", + cl::desc("Print fullpath when printing debug info"), + cl::Hidden); + +namespace { + class PrintDbgInfo : public FunctionPass { + raw_ostream &Out; + void printVariableDeclaration(const Value *V); + public: + static char ID; // Pass identification + PrintDbgInfo() : FunctionPass(ID), Out(errs()) { + initializePrintDbgInfoPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F); + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; + char PrintDbgInfo::ID = 0; +} + +INITIALIZE_PASS(PrintDbgInfo, "print-dbginfo", + "Print debug info in human readable form", false, false) + +FunctionPass *llvm::createDbgInfoPrinterPass() { return new PrintDbgInfo(); } + +/// Find the debug info descriptor corresponding to this global variable. +static Value *findDbgGlobalDeclare(GlobalVariable *V) { + const Module *M = V->getParent(); + NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv"); + if (!NMD) + return 0; + + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i))); + if (!DIG.isGlobalVariable()) + continue; + if (DIGlobalVariable(DIG).getGlobal() == V) + return DIG; + } + return 0; +} + +/// Find the debug info descriptor corresponding to this function. +static Value *findDbgSubprogramDeclare(Function *V) { + const Module *M = V->getParent(); + NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp"); + if (!NMD) + return 0; + + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i))); + if (!DIG.isSubprogram()) + continue; + if (DISubprogram(DIG).getFunction() == V) + return DIG; + } + return 0; +} + +/// Finds the llvm.dbg.declare intrinsic corresponding to this value if any. +/// It looks through pointer casts too. +static const DbgDeclareInst *findDbgDeclare(const Value *V) { + V = V->stripPointerCasts(); + + if (!isa<Instruction>(V) && !isa<Argument>(V)) + return 0; + + const Function *F = NULL; + if (const Instruction *I = dyn_cast<Instruction>(V)) + F = I->getParent()->getParent(); + else if (const Argument *A = dyn_cast<Argument>(V)) + F = A->getParent(); + + for (Function::const_iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) + for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end(); + BI != BE; ++BI) + if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) + if (DDI->getAddress() == V) + return DDI; + + return 0; +} + +static bool getLocationInfo(const Value *V, std::string &DisplayName, + std::string &Type, unsigned &LineNo, + std::string &File, std::string &Dir) { + DICompileUnit Unit; + DIType TypeD; + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(const_cast<Value*>(V))) { + Value *DIGV = findDbgGlobalDeclare(GV); + if (!DIGV) return false; + DIGlobalVariable Var(cast<MDNode>(DIGV)); + + StringRef D = Var.getDisplayName(); + if (!D.empty()) + DisplayName = D; + LineNo = Var.getLineNumber(); + Unit = Var.getCompileUnit(); + TypeD = Var.getType(); + } else if (Function *F = dyn_cast<Function>(const_cast<Value*>(V))){ + Value *DIF = findDbgSubprogramDeclare(F); + if (!DIF) return false; + DISubprogram Var(cast<MDNode>(DIF)); + + StringRef D = Var.getDisplayName(); + if (!D.empty()) + DisplayName = D; + LineNo = Var.getLineNumber(); + Unit = Var.getCompileUnit(); + TypeD = Var.getType(); + } else { + const DbgDeclareInst *DDI = findDbgDeclare(V); + if (!DDI) return false; + DIVariable Var(cast<MDNode>(DDI->getVariable())); + + StringRef D = Var.getName(); + if (!D.empty()) + DisplayName = D; + LineNo = Var.getLineNumber(); + Unit = Var.getCompileUnit(); + TypeD = Var.getType(); + } + + StringRef T = TypeD.getName(); + if (!T.empty()) + Type = T; + StringRef F = Unit.getFilename(); + if (!F.empty()) + File = F; + StringRef D = Unit.getDirectory(); + if (!D.empty()) + Dir = D; + return true; +} + +void PrintDbgInfo::printVariableDeclaration(const Value *V) { + std::string DisplayName, File, Directory, Type; + unsigned LineNo; + + if (!getLocationInfo(V, DisplayName, Type, LineNo, File, Directory)) + return; + + Out << "; "; + WriteAsOperand(Out, V, false, 0); + if (isa<Function>(V)) + Out << " is function " << DisplayName + << " of type " << Type << " declared at "; + else + Out << " is variable " << DisplayName + << " of type " << Type << " declared at "; + + if (PrintDirectory) + Out << Directory << "/"; + + Out << File << ":" << LineNo << "\n"; +} + +bool PrintDbgInfo::runOnFunction(Function &F) { + if (F.isDeclaration()) + return false; + + Out << "function " << F.getName() << "\n\n"; + + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { + BasicBlock *BB = I; + + if (I != F.begin() && (pred_begin(BB) == pred_end(BB))) + // Skip dead blocks. + continue; + + Out << BB->getName(); + Out << ":"; + + Out << "\n"; + + for (BasicBlock::const_iterator i = BB->begin(), e = BB->end(); + i != e; ++i) { + + printVariableDeclaration(i); + + if (const User *U = dyn_cast<User>(i)) { + for(unsigned i=0;i<U->getNumOperands();i++) + printVariableDeclaration(U->getOperand(i)); + } + } + } + return false; +} diff --git a/contrib/llvm/lib/Analysis/DebugInfo.cpp b/contrib/llvm/lib/Analysis/DebugInfo.cpp new file mode 100644 index 0000000..b42e946 --- /dev/null +++ b/contrib/llvm/lib/Analysis/DebugInfo.cpp @@ -0,0 +1,948 @@ +//===--- DebugInfo.cpp - Debug Information Helper Classes -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the helper classes used to build and interpret debug +// information in LLVM IR form. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Intrinsics.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; +using namespace llvm::dwarf; + +//===----------------------------------------------------------------------===// +// DIDescriptor +//===----------------------------------------------------------------------===// + +DIDescriptor::DIDescriptor(const DIFile F) : DbgNode(F.DbgNode) { +} + +DIDescriptor::DIDescriptor(const DISubprogram F) : DbgNode(F.DbgNode) { +} + +DIDescriptor::DIDescriptor(const DILexicalBlock F) : DbgNode(F.DbgNode) { +} + +DIDescriptor::DIDescriptor(const DIVariable F) : DbgNode(F.DbgNode) { +} + +DIDescriptor::DIDescriptor(const DIType F) : DbgNode(F.DbgNode) { +} + +StringRef +DIDescriptor::getStringField(unsigned Elt) const { + if (DbgNode == 0) + return StringRef(); + + if (Elt < DbgNode->getNumOperands()) + if (MDString *MDS = dyn_cast_or_null<MDString>(DbgNode->getOperand(Elt))) + return MDS->getString(); + + return StringRef(); +} + +uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const { + if (DbgNode == 0) + return 0; + + if (Elt < DbgNode->getNumOperands()) + if (ConstantInt *CI = dyn_cast<ConstantInt>(DbgNode->getOperand(Elt))) + return CI->getZExtValue(); + + return 0; +} + +DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const { + if (DbgNode == 0) + return DIDescriptor(); + + if (Elt < DbgNode->getNumOperands()) + return + DIDescriptor(dyn_cast_or_null<const MDNode>(DbgNode->getOperand(Elt))); + return DIDescriptor(); +} + +GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const { + if (DbgNode == 0) + return 0; + + if (Elt < DbgNode->getNumOperands()) + return dyn_cast_or_null<GlobalVariable>(DbgNode->getOperand(Elt)); + return 0; +} + +Constant *DIDescriptor::getConstantField(unsigned Elt) const { + if (DbgNode == 0) + return 0; + + if (Elt < DbgNode->getNumOperands()) + return dyn_cast_or_null<Constant>(DbgNode->getOperand(Elt)); + return 0; +} + +Function *DIDescriptor::getFunctionField(unsigned Elt) const { + if (DbgNode == 0) + return 0; + + if (Elt < DbgNode->getNumOperands()) + return dyn_cast_or_null<Function>(DbgNode->getOperand(Elt)); + return 0; +} + +unsigned DIVariable::getNumAddrElements() const { + if (getVersion() <= llvm::LLVMDebugVersion8) + return DbgNode->getNumOperands()-6; + return DbgNode->getNumOperands()-7; +} + + +//===----------------------------------------------------------------------===// +// Predicates +//===----------------------------------------------------------------------===// + +/// isBasicType - Return true if the specified tag is legal for +/// DIBasicType. +bool DIDescriptor::isBasicType() const { + return DbgNode && getTag() == dwarf::DW_TAG_base_type; +} + +/// isDerivedType - Return true if the specified tag is legal for DIDerivedType. +bool DIDescriptor::isDerivedType() const { + if (!DbgNode) return false; + switch (getTag()) { + case dwarf::DW_TAG_typedef: + case dwarf::DW_TAG_pointer_type: + case dwarf::DW_TAG_reference_type: + case dwarf::DW_TAG_const_type: + case dwarf::DW_TAG_volatile_type: + case dwarf::DW_TAG_restrict_type: + case dwarf::DW_TAG_member: + case dwarf::DW_TAG_inheritance: + case dwarf::DW_TAG_friend: + return true; + default: + // CompositeTypes are currently modelled as DerivedTypes. + return isCompositeType(); + } +} + +/// isCompositeType - Return true if the specified tag is legal for +/// DICompositeType. +bool DIDescriptor::isCompositeType() const { + if (!DbgNode) return false; + switch (getTag()) { + case dwarf::DW_TAG_array_type: + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_enumeration_type: + case dwarf::DW_TAG_vector_type: + case dwarf::DW_TAG_subroutine_type: + case dwarf::DW_TAG_class_type: + return true; + default: + return false; + } +} + +/// isVariable - Return true if the specified tag is legal for DIVariable. +bool DIDescriptor::isVariable() const { + if (!DbgNode) return false; + switch (getTag()) { + case dwarf::DW_TAG_auto_variable: + case dwarf::DW_TAG_arg_variable: + case dwarf::DW_TAG_return_variable: + return true; + default: + return false; + } +} + +/// isType - Return true if the specified tag is legal for DIType. +bool DIDescriptor::isType() const { + return isBasicType() || isCompositeType() || isDerivedType(); +} + +/// isSubprogram - Return true if the specified tag is legal for +/// DISubprogram. +bool DIDescriptor::isSubprogram() const { + return DbgNode && getTag() == dwarf::DW_TAG_subprogram; +} + +/// isGlobalVariable - Return true if the specified tag is legal for +/// DIGlobalVariable. +bool DIDescriptor::isGlobalVariable() const { + return DbgNode && (getTag() == dwarf::DW_TAG_variable || + getTag() == dwarf::DW_TAG_constant); +} + +/// isGlobal - Return true if the specified tag is legal for DIGlobal. +bool DIDescriptor::isGlobal() const { + return isGlobalVariable(); +} + +/// isUnspecifiedParmeter - Return true if the specified tag is +/// DW_TAG_unspecified_parameters. +bool DIDescriptor::isUnspecifiedParameter() const { + return DbgNode && getTag() == dwarf::DW_TAG_unspecified_parameters; +} + +/// isScope - Return true if the specified tag is one of the scope +/// related tag. +bool DIDescriptor::isScope() const { + if (!DbgNode) return false; + switch (getTag()) { + case dwarf::DW_TAG_compile_unit: + case dwarf::DW_TAG_lexical_block: + case dwarf::DW_TAG_subprogram: + case dwarf::DW_TAG_namespace: + return true; + default: + break; + } + return false; +} + +/// isTemplateTypeParameter - Return true if the specified tag is +/// DW_TAG_template_type_parameter. +bool DIDescriptor::isTemplateTypeParameter() const { + return DbgNode && getTag() == dwarf::DW_TAG_template_type_parameter; +} + +/// isTemplateValueParameter - Return true if the specified tag is +/// DW_TAG_template_value_parameter. +bool DIDescriptor::isTemplateValueParameter() const { + return DbgNode && getTag() == dwarf::DW_TAG_template_value_parameter; +} + +/// isCompileUnit - Return true if the specified tag is DW_TAG_compile_unit. +bool DIDescriptor::isCompileUnit() const { + return DbgNode && getTag() == dwarf::DW_TAG_compile_unit; +} + +/// isFile - Return true if the specified tag is DW_TAG_file_type. +bool DIDescriptor::isFile() const { + return DbgNode && getTag() == dwarf::DW_TAG_file_type; +} + +/// isNameSpace - Return true if the specified tag is DW_TAG_namespace. +bool DIDescriptor::isNameSpace() const { + return DbgNode && getTag() == dwarf::DW_TAG_namespace; +} + +/// isLexicalBlock - Return true if the specified tag is DW_TAG_lexical_block. +bool DIDescriptor::isLexicalBlock() const { + return DbgNode && getTag() == dwarf::DW_TAG_lexical_block; +} + +/// isSubrange - Return true if the specified tag is DW_TAG_subrange_type. +bool DIDescriptor::isSubrange() const { + return DbgNode && getTag() == dwarf::DW_TAG_subrange_type; +} + +/// isEnumerator - Return true if the specified tag is DW_TAG_enumerator. +bool DIDescriptor::isEnumerator() const { + return DbgNode && getTag() == dwarf::DW_TAG_enumerator; +} + +//===----------------------------------------------------------------------===// +// Simple Descriptor Constructors and other Methods +//===----------------------------------------------------------------------===// + +DIType::DIType(const MDNode *N) : DIScope(N) { + if (!N) return; + if (!isBasicType() && !isDerivedType() && !isCompositeType()) { + DbgNode = 0; + } +} + +unsigned DIArray::getNumElements() const { + if (!DbgNode) + return 0; + return DbgNode->getNumOperands(); +} + +/// replaceAllUsesWith - Replace all uses of debug info referenced by +/// this descriptor. +void DIType::replaceAllUsesWith(DIDescriptor &D) { + if (!DbgNode) + return; + + // Since we use a TrackingVH for the node, its easy for clients to manufacture + // legitimate situations where they want to replaceAllUsesWith() on something + // which, due to uniquing, has merged with the source. We shield clients from + // this detail by allowing a value to be replaced with replaceAllUsesWith() + // itself. + if (DbgNode != D) { + MDNode *Node = const_cast<MDNode*>(DbgNode); + const MDNode *DN = D; + const Value *V = cast_or_null<Value>(DN); + Node->replaceAllUsesWith(const_cast<Value*>(V)); + MDNode::deleteTemporary(Node); + } +} + +/// replaceAllUsesWith - Replace all uses of debug info referenced by +/// this descriptor. +void DIType::replaceAllUsesWith(MDNode *D) { + if (!DbgNode) + return; + + // Since we use a TrackingVH for the node, its easy for clients to manufacture + // legitimate situations where they want to replaceAllUsesWith() on something + // which, due to uniquing, has merged with the source. We shield clients from + // this detail by allowing a value to be replaced with replaceAllUsesWith() + // itself. + if (DbgNode != D) { + MDNode *Node = const_cast<MDNode*>(DbgNode); + const MDNode *DN = D; + const Value *V = cast_or_null<Value>(DN); + Node->replaceAllUsesWith(const_cast<Value*>(V)); + MDNode::deleteTemporary(Node); + } +} + +/// Verify - Verify that a compile unit is well formed. +bool DICompileUnit::Verify() const { + if (!DbgNode) + return false; + StringRef N = getFilename(); + if (N.empty()) + return false; + // It is possible that directory and produce string is empty. + return true; +} + +/// Verify - Verify that a type descriptor is well formed. +bool DIType::Verify() const { + if (!DbgNode) + return false; + if (!getContext().Verify()) + return false; + unsigned Tag = getTag(); + if (!isBasicType() && Tag != dwarf::DW_TAG_const_type && + Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_pointer_type && + Tag != dwarf::DW_TAG_reference_type && Tag != dwarf::DW_TAG_restrict_type + && Tag != dwarf::DW_TAG_vector_type && Tag != dwarf::DW_TAG_array_type + && Tag != dwarf::DW_TAG_enumeration_type + && getFilename().empty()) + return false; + return true; +} + +/// Verify - Verify that a basic type descriptor is well formed. +bool DIBasicType::Verify() const { + return isBasicType(); +} + +/// Verify - Verify that a derived type descriptor is well formed. +bool DIDerivedType::Verify() const { + return isDerivedType(); +} + +/// Verify - Verify that a composite type descriptor is well formed. +bool DICompositeType::Verify() const { + if (!DbgNode) + return false; + if (!getContext().Verify()) + return false; + + DICompileUnit CU = getCompileUnit(); + if (!CU.Verify()) + return false; + return true; +} + +/// Verify - Verify that a subprogram descriptor is well formed. +bool DISubprogram::Verify() const { + if (!DbgNode) + return false; + + if (!getContext().Verify()) + return false; + + DICompileUnit CU = getCompileUnit(); + if (!CU.Verify()) + return false; + + DICompositeType Ty = getType(); + if (!Ty.Verify()) + return false; + return true; +} + +/// Verify - Verify that a global variable descriptor is well formed. +bool DIGlobalVariable::Verify() const { + if (!DbgNode) + return false; + + if (getDisplayName().empty()) + return false; + + if (!getContext().Verify()) + return false; + + DICompileUnit CU = getCompileUnit(); + if (!CU.Verify()) + return false; + + DIType Ty = getType(); + if (!Ty.Verify()) + return false; + + if (!getGlobal() && !getConstant()) + return false; + + return true; +} + +/// Verify - Verify that a variable descriptor is well formed. +bool DIVariable::Verify() const { + if (!DbgNode) + return false; + + if (!getContext().Verify()) + return false; + + if (!getCompileUnit().Verify()) + return false; + + DIType Ty = getType(); + if (!Ty.Verify()) + return false; + + return true; +} + +/// Verify - Verify that a location descriptor is well formed. +bool DILocation::Verify() const { + if (!DbgNode) + return false; + + return DbgNode->getNumOperands() == 4; +} + +/// Verify - Verify that a namespace descriptor is well formed. +bool DINameSpace::Verify() const { + if (!DbgNode) + return false; + if (getName().empty()) + return false; + if (!getCompileUnit().Verify()) + return false; + return true; +} + +/// getOriginalTypeSize - If this type is derived from a base type then +/// return base type size. +uint64_t DIDerivedType::getOriginalTypeSize() const { + unsigned Tag = getTag(); + if (Tag == dwarf::DW_TAG_member || Tag == dwarf::DW_TAG_typedef || + Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type || + Tag == dwarf::DW_TAG_restrict_type) { + DIType BaseType = getTypeDerivedFrom(); + // If this type is not derived from any type then take conservative + // approach. + if (!BaseType.isValid()) + return getSizeInBits(); + if (BaseType.isDerivedType()) + return DIDerivedType(BaseType).getOriginalTypeSize(); + else + return BaseType.getSizeInBits(); + } + + return getSizeInBits(); +} + +/// isInlinedFnArgument - Return true if this variable provides debugging +/// information for an inlined function arguments. +bool DIVariable::isInlinedFnArgument(const Function *CurFn) { + assert(CurFn && "Invalid function"); + if (!getContext().isSubprogram()) + return false; + // This variable is not inlined function argument if its scope + // does not describe current function. + return !(DISubprogram(getContext()).describes(CurFn)); +} + +/// describes - Return true if this subprogram provides debugging +/// information for the function F. +bool DISubprogram::describes(const Function *F) { + assert(F && "Invalid function"); + if (F == getFunction()) + return true; + StringRef Name = getLinkageName(); + if (Name.empty()) + Name = getName(); + if (F->getName() == Name) + return true; + return false; +} + +unsigned DISubprogram::isOptimized() const { + assert (DbgNode && "Invalid subprogram descriptor!"); + if (DbgNode->getNumOperands() == 16) + return getUnsignedField(15); + return 0; +} + +StringRef DIScope::getFilename() const { + if (!DbgNode) + return StringRef(); + if (isLexicalBlock()) + return DILexicalBlock(DbgNode).getFilename(); + if (isSubprogram()) + return DISubprogram(DbgNode).getFilename(); + if (isCompileUnit()) + return DICompileUnit(DbgNode).getFilename(); + if (isNameSpace()) + return DINameSpace(DbgNode).getFilename(); + if (isType()) + return DIType(DbgNode).getFilename(); + if (isFile()) + return DIFile(DbgNode).getFilename(); + assert(0 && "Invalid DIScope!"); + return StringRef(); +} + +StringRef DIScope::getDirectory() const { + if (!DbgNode) + return StringRef(); + if (isLexicalBlock()) + return DILexicalBlock(DbgNode).getDirectory(); + if (isSubprogram()) + return DISubprogram(DbgNode).getDirectory(); + if (isCompileUnit()) + return DICompileUnit(DbgNode).getDirectory(); + if (isNameSpace()) + return DINameSpace(DbgNode).getDirectory(); + if (isType()) + return DIType(DbgNode).getDirectory(); + if (isFile()) + return DIFile(DbgNode).getDirectory(); + assert(0 && "Invalid DIScope!"); + return StringRef(); +} + +//===----------------------------------------------------------------------===// +// DIDescriptor: dump routines for all descriptors. +//===----------------------------------------------------------------------===// + + +/// print - Print descriptor. +void DIDescriptor::print(raw_ostream &OS) const { + OS << "[" << dwarf::TagString(getTag()) << "] "; + OS.write_hex((intptr_t) &*DbgNode) << ']'; +} + +/// print - Print compile unit. +void DICompileUnit::print(raw_ostream &OS) const { + if (getLanguage()) + OS << " [" << dwarf::LanguageString(getLanguage()) << "] "; + + OS << " [" << getDirectory() << "/" << getFilename() << "]"; +} + +/// print - Print type. +void DIType::print(raw_ostream &OS) const { + if (!DbgNode) return; + + StringRef Res = getName(); + if (!Res.empty()) + OS << " [" << Res << "] "; + + unsigned Tag = getTag(); + OS << " [" << dwarf::TagString(Tag) << "] "; + + // TODO : Print context + getCompileUnit().print(OS); + OS << " [" + << "line " << getLineNumber() << ", " + << getSizeInBits() << " bits, " + << getAlignInBits() << " bit alignment, " + << getOffsetInBits() << " bit offset" + << "] "; + + if (isPrivate()) + OS << " [private] "; + else if (isProtected()) + OS << " [protected] "; + + if (isForwardDecl()) + OS << " [fwd] "; + + if (isBasicType()) + DIBasicType(DbgNode).print(OS); + else if (isDerivedType()) + DIDerivedType(DbgNode).print(OS); + else if (isCompositeType()) + DICompositeType(DbgNode).print(OS); + else { + OS << "Invalid DIType\n"; + return; + } + + OS << "\n"; +} + +/// print - Print basic type. +void DIBasicType::print(raw_ostream &OS) const { + OS << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] "; +} + +/// print - Print derived type. +void DIDerivedType::print(raw_ostream &OS) const { + OS << "\n\t Derived From: "; getTypeDerivedFrom().print(OS); +} + +/// print - Print composite type. +void DICompositeType::print(raw_ostream &OS) const { + DIArray A = getTypeArray(); + OS << " [" << A.getNumElements() << " elements]"; +} + +/// print - Print subprogram. +void DISubprogram::print(raw_ostream &OS) const { + StringRef Res = getName(); + if (!Res.empty()) + OS << " [" << Res << "] "; + + unsigned Tag = getTag(); + OS << " [" << dwarf::TagString(Tag) << "] "; + + // TODO : Print context + getCompileUnit().print(OS); + OS << " [" << getLineNumber() << "] "; + + if (isLocalToUnit()) + OS << " [local] "; + + if (isDefinition()) + OS << " [def] "; + + OS << "\n"; +} + +/// print - Print global variable. +void DIGlobalVariable::print(raw_ostream &OS) const { + OS << " ["; + StringRef Res = getName(); + if (!Res.empty()) + OS << " [" << Res << "] "; + + unsigned Tag = getTag(); + OS << " [" << dwarf::TagString(Tag) << "] "; + + // TODO : Print context + getCompileUnit().print(OS); + OS << " [" << getLineNumber() << "] "; + + if (isLocalToUnit()) + OS << " [local] "; + + if (isDefinition()) + OS << " [def] "; + + if (isGlobalVariable()) + DIGlobalVariable(DbgNode).print(OS); + OS << "]\n"; +} + +/// print - Print variable. +void DIVariable::print(raw_ostream &OS) const { + StringRef Res = getName(); + if (!Res.empty()) + OS << " [" << Res << "] "; + + getCompileUnit().print(OS); + OS << " [" << getLineNumber() << "] "; + getType().print(OS); + OS << "\n"; + + // FIXME: Dump complex addresses +} + +/// dump - Print descriptor to dbgs() with a newline. +void DIDescriptor::dump() const { + print(dbgs()); dbgs() << '\n'; +} + +/// dump - Print compile unit to dbgs() with a newline. +void DICompileUnit::dump() const { + print(dbgs()); dbgs() << '\n'; +} + +/// dump - Print type to dbgs() with a newline. +void DIType::dump() const { + print(dbgs()); dbgs() << '\n'; +} + +/// dump - Print basic type to dbgs() with a newline. +void DIBasicType::dump() const { + print(dbgs()); dbgs() << '\n'; +} + +/// dump - Print derived type to dbgs() with a newline. +void DIDerivedType::dump() const { + print(dbgs()); dbgs() << '\n'; +} + +/// dump - Print composite type to dbgs() with a newline. +void DICompositeType::dump() const { + print(dbgs()); dbgs() << '\n'; +} + +/// dump - Print subprogram to dbgs() with a newline. +void DISubprogram::dump() const { + print(dbgs()); dbgs() << '\n'; +} + +/// dump - Print global variable. +void DIGlobalVariable::dump() const { + print(dbgs()); dbgs() << '\n'; +} + +/// dump - Print variable. +void DIVariable::dump() const { + print(dbgs()); dbgs() << '\n'; +} + +/// fixupObjcLikeName - Replace contains special characters used +/// in a typical Objective-C names with '.' in a given string. +static void fixupObjcLikeName(StringRef Str, SmallVectorImpl<char> &Out) { + bool isObjCLike = false; + for (size_t i = 0, e = Str.size(); i < e; ++i) { + char C = Str[i]; + if (C == '[') + isObjCLike = true; + + if (isObjCLike && (C == '[' || C == ']' || C == ' ' || C == ':' || + C == '+' || C == '(' || C == ')')) + Out.push_back('.'); + else + Out.push_back(C); + } +} + +/// getFnSpecificMDNode - Return a NameMDNode, if available, that is +/// suitable to hold function specific information. +NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, StringRef FuncName) { + SmallString<32> Name = StringRef("llvm.dbg.lv."); + fixupObjcLikeName(FuncName, Name); + + return M.getNamedMetadata(Name.str()); +} + +/// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable +/// to hold function specific information. +NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, StringRef FuncName) { + SmallString<32> Name = StringRef("llvm.dbg.lv."); + fixupObjcLikeName(FuncName, Name); + + return M.getOrInsertNamedMetadata(Name.str()); +} + + +//===----------------------------------------------------------------------===// +// DebugInfoFinder implementations. +//===----------------------------------------------------------------------===// + +/// processModule - Process entire module and collect debug info. +void DebugInfoFinder::processModule(Module &M) { + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI) + for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE; + ++BI) { + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) + processDeclare(DDI); + + DebugLoc Loc = BI->getDebugLoc(); + if (Loc.isUnknown()) + continue; + + LLVMContext &Ctx = BI->getContext(); + DIDescriptor Scope(Loc.getScope(Ctx)); + + if (Scope.isCompileUnit()) + addCompileUnit(DICompileUnit(Scope)); + else if (Scope.isSubprogram()) + processSubprogram(DISubprogram(Scope)); + else if (Scope.isLexicalBlock()) + processLexicalBlock(DILexicalBlock(Scope)); + + if (MDNode *IA = Loc.getInlinedAt(Ctx)) + processLocation(DILocation(IA)); + } + + if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) { + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIGlobalVariable DIG(cast<MDNode>(NMD->getOperand(i))); + if (addGlobalVariable(DIG)) { + addCompileUnit(DIG.getCompileUnit()); + processType(DIG.getType()); + } + } + } + + if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp")) + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) + processSubprogram(DISubprogram(NMD->getOperand(i))); +} + +/// processLocation - Process DILocation. +void DebugInfoFinder::processLocation(DILocation Loc) { + if (!Loc.Verify()) return; + DIDescriptor S(Loc.getScope()); + if (S.isCompileUnit()) + addCompileUnit(DICompileUnit(S)); + else if (S.isSubprogram()) + processSubprogram(DISubprogram(S)); + else if (S.isLexicalBlock()) + processLexicalBlock(DILexicalBlock(S)); + processLocation(Loc.getOrigLocation()); +} + +/// processType - Process DIType. +void DebugInfoFinder::processType(DIType DT) { + if (!addType(DT)) + return; + + addCompileUnit(DT.getCompileUnit()); + if (DT.isCompositeType()) { + DICompositeType DCT(DT); + processType(DCT.getTypeDerivedFrom()); + DIArray DA = DCT.getTypeArray(); + for (unsigned i = 0, e = DA.getNumElements(); i != e; ++i) { + DIDescriptor D = DA.getElement(i); + if (D.isType()) + processType(DIType(D)); + else if (D.isSubprogram()) + processSubprogram(DISubprogram(D)); + } + } else if (DT.isDerivedType()) { + DIDerivedType DDT(DT); + processType(DDT.getTypeDerivedFrom()); + } +} + +/// processLexicalBlock +void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) { + DIScope Context = LB.getContext(); + if (Context.isLexicalBlock()) + return processLexicalBlock(DILexicalBlock(Context)); + else + return processSubprogram(DISubprogram(Context)); +} + +/// processSubprogram - Process DISubprogram. +void DebugInfoFinder::processSubprogram(DISubprogram SP) { + if (!addSubprogram(SP)) + return; + addCompileUnit(SP.getCompileUnit()); + processType(SP.getType()); +} + +/// processDeclare - Process DbgDeclareInst. +void DebugInfoFinder::processDeclare(DbgDeclareInst *DDI) { + MDNode *N = dyn_cast<MDNode>(DDI->getVariable()); + if (!N) return; + + DIDescriptor DV(N); + if (!DV.isVariable()) + return; + + if (!NodesSeen.insert(DV)) + return; + + addCompileUnit(DIVariable(N).getCompileUnit()); + processType(DIVariable(N).getType()); +} + +/// addType - Add type into Tys. +bool DebugInfoFinder::addType(DIType DT) { + if (!DT.isValid()) + return false; + + if (!NodesSeen.insert(DT)) + return false; + + TYs.push_back(DT); + return true; +} + +/// addCompileUnit - Add compile unit into CUs. +bool DebugInfoFinder::addCompileUnit(DICompileUnit CU) { + if (!CU.Verify()) + return false; + + if (!NodesSeen.insert(CU)) + return false; + + CUs.push_back(CU); + return true; +} + +/// addGlobalVariable - Add global variable into GVs. +bool DebugInfoFinder::addGlobalVariable(DIGlobalVariable DIG) { + if (!DIDescriptor(DIG).isGlobalVariable()) + return false; + + if (!NodesSeen.insert(DIG)) + return false; + + GVs.push_back(DIG); + return true; +} + +// addSubprogram - Add subprgoram into SPs. +bool DebugInfoFinder::addSubprogram(DISubprogram SP) { + if (!DIDescriptor(SP).isSubprogram()) + return false; + + if (!NodesSeen.insert(SP)) + return false; + + SPs.push_back(SP); + return true; +} + +/// getDISubprogram - Find subprogram that is enclosing this scope. +DISubprogram llvm::getDISubprogram(const MDNode *Scope) { + DIDescriptor D(Scope); + if (D.isSubprogram()) + return DISubprogram(Scope); + + if (D.isLexicalBlock()) + return getDISubprogram(DILexicalBlock(Scope).getContext()); + + return DISubprogram(); +} + +/// getDICompositeType - Find underlying composite type. +DICompositeType llvm::getDICompositeType(DIType T) { + if (T.isCompositeType()) + return DICompositeType(T); + + if (T.isDerivedType()) + return getDICompositeType(DIDerivedType(T).getTypeDerivedFrom()); + + return DICompositeType(); +} diff --git a/contrib/llvm/lib/Analysis/DomPrinter.cpp b/contrib/llvm/lib/Analysis/DomPrinter.cpp new file mode 100644 index 0000000..cde4314 --- /dev/null +++ b/contrib/llvm/lib/Analysis/DomPrinter.cpp @@ -0,0 +1,232 @@ +//===- DomPrinter.cpp - DOT printer for the dominance trees ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines '-dot-dom' and '-dot-postdom' analysis passes, which emit +// a dom.<fnname>.dot or postdom.<fnname>.dot file for each function in the +// program, with a graph of the dominance/postdominance tree of that +// function. +// +// There are also passes available to directly call dotty ('-view-dom' or +// '-view-postdom'). By appending '-only' like '-dot-dom-only' only the +// names of the bbs are printed, but the content is hidden. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/DomPrinter.h" +#include "llvm/Analysis/DOTGraphTraitsPass.h" +#include "llvm/Analysis/PostDominators.h" + +using namespace llvm; + +namespace llvm { +template<> +struct DOTGraphTraits<DomTreeNode*> : public DefaultDOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) + : DefaultDOTGraphTraits(isSimple) {} + + std::string getNodeLabel(DomTreeNode *Node, DomTreeNode *Graph) { + + BasicBlock *BB = Node->getBlock(); + + if (!BB) + return "Post dominance root node"; + + + if (isSimple()) + return DOTGraphTraits<const Function*> + ::getSimpleNodeLabel(BB, BB->getParent()); + else + return DOTGraphTraits<const Function*> + ::getCompleteNodeLabel(BB, BB->getParent()); + } +}; + +template<> +struct DOTGraphTraits<DominatorTree*> : public DOTGraphTraits<DomTreeNode*> { + + DOTGraphTraits (bool isSimple=false) + : DOTGraphTraits<DomTreeNode*>(isSimple) {} + + static std::string getGraphName(DominatorTree *DT) { + return "Dominator tree"; + } + + std::string getNodeLabel(DomTreeNode *Node, DominatorTree *G) { + return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode()); + } +}; + +template<> +struct DOTGraphTraits<PostDominatorTree*> + : public DOTGraphTraits<DomTreeNode*> { + + DOTGraphTraits (bool isSimple=false) + : DOTGraphTraits<DomTreeNode*>(isSimple) {} + + static std::string getGraphName(PostDominatorTree *DT) { + return "Post dominator tree"; + } + + std::string getNodeLabel(DomTreeNode *Node, PostDominatorTree *G ) { + return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode()); + } +}; +} + +namespace { +struct DomViewer + : public DOTGraphTraitsViewer<DominatorTree, false> { + static char ID; + DomViewer() : DOTGraphTraitsViewer<DominatorTree, false>("dom", ID){ + initializeDomViewerPass(*PassRegistry::getPassRegistry()); + } +}; + +struct DomOnlyViewer + : public DOTGraphTraitsViewer<DominatorTree, true> { + static char ID; + DomOnlyViewer() : DOTGraphTraitsViewer<DominatorTree, true>("domonly", ID){ + initializeDomOnlyViewerPass(*PassRegistry::getPassRegistry()); + } +}; + +struct PostDomViewer + : public DOTGraphTraitsViewer<PostDominatorTree, false> { + static char ID; + PostDomViewer() : + DOTGraphTraitsViewer<PostDominatorTree, false>("postdom", ID){ + initializePostDomViewerPass(*PassRegistry::getPassRegistry()); + } +}; + +struct PostDomOnlyViewer + : public DOTGraphTraitsViewer<PostDominatorTree, true> { + static char ID; + PostDomOnlyViewer() : + DOTGraphTraitsViewer<PostDominatorTree, true>("postdomonly", ID){ + initializePostDomOnlyViewerPass(*PassRegistry::getPassRegistry()); + } +}; +} // end anonymous namespace + +char DomViewer::ID = 0; +INITIALIZE_PASS(DomViewer, "view-dom", + "View dominance tree of function", false, false) + +char DomOnlyViewer::ID = 0; +INITIALIZE_PASS(DomOnlyViewer, "view-dom-only", + "View dominance tree of function (with no function bodies)", + false, false) + +char PostDomViewer::ID = 0; +INITIALIZE_PASS(PostDomViewer, "view-postdom", + "View postdominance tree of function", false, false) + +char PostDomOnlyViewer::ID = 0; +INITIALIZE_PASS(PostDomOnlyViewer, "view-postdom-only", + "View postdominance tree of function " + "(with no function bodies)", + false, false) + +namespace { +struct DomPrinter + : public DOTGraphTraitsPrinter<DominatorTree, false> { + static char ID; + DomPrinter() : DOTGraphTraitsPrinter<DominatorTree, false>("dom", ID) { + initializeDomPrinterPass(*PassRegistry::getPassRegistry()); + } +}; + +struct DomOnlyPrinter + : public DOTGraphTraitsPrinter<DominatorTree, true> { + static char ID; + DomOnlyPrinter() : DOTGraphTraitsPrinter<DominatorTree, true>("domonly", ID) { + initializeDomOnlyPrinterPass(*PassRegistry::getPassRegistry()); + } +}; + +struct PostDomPrinter + : public DOTGraphTraitsPrinter<PostDominatorTree, false> { + static char ID; + PostDomPrinter() : + DOTGraphTraitsPrinter<PostDominatorTree, false>("postdom", ID) { + initializePostDomPrinterPass(*PassRegistry::getPassRegistry()); + } +}; + +struct PostDomOnlyPrinter + : public DOTGraphTraitsPrinter<PostDominatorTree, true> { + static char ID; + PostDomOnlyPrinter() : + DOTGraphTraitsPrinter<PostDominatorTree, true>("postdomonly", ID) { + initializePostDomOnlyPrinterPass(*PassRegistry::getPassRegistry()); + } +}; +} // end anonymous namespace + + + +char DomPrinter::ID = 0; +INITIALIZE_PASS(DomPrinter, "dot-dom", + "Print dominance tree of function to 'dot' file", + false, false) + +char DomOnlyPrinter::ID = 0; +INITIALIZE_PASS(DomOnlyPrinter, "dot-dom-only", + "Print dominance tree of function to 'dot' file " + "(with no function bodies)", + false, false) + +char PostDomPrinter::ID = 0; +INITIALIZE_PASS(PostDomPrinter, "dot-postdom", + "Print postdominance tree of function to 'dot' file", + false, false) + +char PostDomOnlyPrinter::ID = 0; +INITIALIZE_PASS(PostDomOnlyPrinter, "dot-postdom-only", + "Print postdominance tree of function to 'dot' file " + "(with no function bodies)", + false, false) + +// Create methods available outside of this file, to use them +// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by +// the link time optimization. + +FunctionPass *llvm::createDomPrinterPass() { + return new DomPrinter(); +} + +FunctionPass *llvm::createDomOnlyPrinterPass() { + return new DomOnlyPrinter(); +} + +FunctionPass *llvm::createDomViewerPass() { + return new DomViewer(); +} + +FunctionPass *llvm::createDomOnlyViewerPass() { + return new DomOnlyViewer(); +} + +FunctionPass *llvm::createPostDomPrinterPass() { + return new PostDomPrinter(); +} + +FunctionPass *llvm::createPostDomOnlyPrinterPass() { + return new PostDomOnlyPrinter(); +} + +FunctionPass *llvm::createPostDomViewerPass() { + return new PostDomViewer(); +} + +FunctionPass *llvm::createPostDomOnlyViewerPass() { + return new PostDomOnlyViewer(); +} diff --git a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp new file mode 100644 index 0000000..6de4e1e --- /dev/null +++ b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp @@ -0,0 +1,137 @@ +//===- DominanceFrontier.cpp - Dominance Frontier Calculation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/DominanceFrontier.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +char DominanceFrontier::ID = 0; +INITIALIZE_PASS_BEGIN(DominanceFrontier, "domfrontier", + "Dominance Frontier Construction", true, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_END(DominanceFrontier, "domfrontier", + "Dominance Frontier Construction", true, true) + +namespace { + class DFCalculateWorkObject { + public: + DFCalculateWorkObject(BasicBlock *B, BasicBlock *P, + const DomTreeNode *N, + const DomTreeNode *PN) + : currentBB(B), parentBB(P), Node(N), parentNode(PN) {} + BasicBlock *currentBB; + BasicBlock *parentBB; + const DomTreeNode *Node; + const DomTreeNode *parentNode; + }; +} + +const DominanceFrontier::DomSetType & +DominanceFrontier::calculate(const DominatorTree &DT, + const DomTreeNode *Node) { + BasicBlock *BB = Node->getBlock(); + DomSetType *Result = NULL; + + std::vector<DFCalculateWorkObject> workList; + SmallPtrSet<BasicBlock *, 32> visited; + + workList.push_back(DFCalculateWorkObject(BB, NULL, Node, NULL)); + do { + DFCalculateWorkObject *currentW = &workList.back(); + assert (currentW && "Missing work object."); + + BasicBlock *currentBB = currentW->currentBB; + BasicBlock *parentBB = currentW->parentBB; + const DomTreeNode *currentNode = currentW->Node; + const DomTreeNode *parentNode = currentW->parentNode; + assert (currentBB && "Invalid work object. Missing current Basic Block"); + assert (currentNode && "Invalid work object. Missing current Node"); + DomSetType &S = Frontiers[currentBB]; + + // Visit each block only once. + if (visited.count(currentBB) == 0) { + visited.insert(currentBB); + + // Loop over CFG successors to calculate DFlocal[currentNode] + for (succ_iterator SI = succ_begin(currentBB), SE = succ_end(currentBB); + SI != SE; ++SI) { + // Does Node immediately dominate this successor? + if (DT[*SI]->getIDom() != currentNode) + S.insert(*SI); + } + } + + // At this point, S is DFlocal. Now we union in DFup's of our children... + // Loop through and visit the nodes that Node immediately dominates (Node's + // children in the IDomTree) + bool visitChild = false; + for (DomTreeNode::const_iterator NI = currentNode->begin(), + NE = currentNode->end(); NI != NE; ++NI) { + DomTreeNode *IDominee = *NI; + BasicBlock *childBB = IDominee->getBlock(); + if (visited.count(childBB) == 0) { + workList.push_back(DFCalculateWorkObject(childBB, currentBB, + IDominee, currentNode)); + visitChild = true; + } + } + + // If all children are visited or there is any child then pop this block + // from the workList. + if (!visitChild) { + + if (!parentBB) { + Result = &S; + break; + } + + DomSetType::const_iterator CDFI = S.begin(), CDFE = S.end(); + DomSetType &parentSet = Frontiers[parentBB]; + for (; CDFI != CDFE; ++CDFI) { + if (!DT.properlyDominates(parentNode, DT[*CDFI])) + parentSet.insert(*CDFI); + } + workList.pop_back(); + } + + } while (!workList.empty()); + + return *Result; +} + +void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const { + for (const_iterator I = begin(), E = end(); I != E; ++I) { + OS << " DomFrontier for BB "; + if (I->first) + WriteAsOperand(OS, I->first, false); + else + OS << " <<exit node>>"; + OS << " is:\t"; + + const std::set<BasicBlock*> &BBs = I->second; + + for (std::set<BasicBlock*>::const_iterator I = BBs.begin(), E = BBs.end(); + I != E; ++I) { + OS << ' '; + if (*I) + WriteAsOperand(OS, *I, false); + else + OS << "<<exit node>>"; + } + OS << "\n"; + } +} + +void DominanceFrontierBase::dump() const { + print(dbgs()); +} + diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp new file mode 100644 index 0000000..2e79eab --- /dev/null +++ b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp @@ -0,0 +1,340 @@ +//===- CallGraph.cpp - Build a Module's call graph ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CallGraph class and provides the BasicCallGraph +// default implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Module.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + +//===----------------------------------------------------------------------===// +// BasicCallGraph class definition +// +class BasicCallGraph : public ModulePass, public CallGraph { + // Root is root of the call graph, or the external node if a 'main' function + // couldn't be found. + // + CallGraphNode *Root; + + // ExternalCallingNode - This node has edges to all external functions and + // those internal functions that have their address taken. + CallGraphNode *ExternalCallingNode; + + // CallsExternalNode - This node has edges to it from all functions making + // indirect calls or calling an external function. + CallGraphNode *CallsExternalNode; + +public: + static char ID; // Class identification, replacement for typeinfo + BasicCallGraph() : ModulePass(ID), Root(0), + ExternalCallingNode(0), CallsExternalNode(0) { + initializeBasicCallGraphPass(*PassRegistry::getPassRegistry()); + } + + // runOnModule - Compute the call graph for the specified module. + virtual bool runOnModule(Module &M) { + CallGraph::initialize(M); + + ExternalCallingNode = getOrInsertFunction(0); + CallsExternalNode = new CallGraphNode(0); + Root = 0; + + // Add every function to the call graph. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + addToCallGraph(I); + + // If we didn't find a main function, use the external call graph node + if (Root == 0) Root = ExternalCallingNode; + + return false; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + virtual void print(raw_ostream &OS, const Module *) const { + OS << "CallGraph Root is: "; + if (Function *F = getRoot()->getFunction()) + OS << F->getName() << "\n"; + else { + OS << "<<null function: 0x" << getRoot() << ">>\n"; + } + + CallGraph::print(OS, 0); + } + + virtual void releaseMemory() { + destroy(); + } + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it should + /// override this to adjust the this pointer as needed for the specified pass + /// info. + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &CallGraph::ID) + return (CallGraph*)this; + return this; + } + + CallGraphNode* getExternalCallingNode() const { return ExternalCallingNode; } + CallGraphNode* getCallsExternalNode() const { return CallsExternalNode; } + + // getRoot - Return the root of the call graph, which is either main, or if + // main cannot be found, the external node. + // + CallGraphNode *getRoot() { return Root; } + const CallGraphNode *getRoot() const { return Root; } + +private: + //===--------------------------------------------------------------------- + // Implementation of CallGraph construction + // + + // addToCallGraph - Add a function to the call graph, and link the node to all + // of the functions that it calls. + // + void addToCallGraph(Function *F) { + CallGraphNode *Node = getOrInsertFunction(F); + + // If this function has external linkage, anything could call it. + if (!F->hasLocalLinkage()) { + ExternalCallingNode->addCalledFunction(CallSite(), Node); + + // Found the entry point? + if (F->getName() == "main") { + if (Root) // Found multiple external mains? Don't pick one. + Root = ExternalCallingNode; + else + Root = Node; // Found a main, keep track of it! + } + } + + // Loop over all of the users of the function, looking for non-call uses. + for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; ++I){ + User *U = *I; + if ((!isa<CallInst>(U) && !isa<InvokeInst>(U)) + || !CallSite(cast<Instruction>(U)).isCallee(I)) { + // Not a call, or being used as a parameter rather than as the callee. + ExternalCallingNode->addCalledFunction(CallSite(), Node); + break; + } + } + + // If this function is not defined in this translation unit, it could call + // anything. + if (F->isDeclaration() && !F->isIntrinsic()) + Node->addCalledFunction(CallSite(), CallsExternalNode); + + // Look for calls by this function. + for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB) + for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); + II != IE; ++II) { + CallSite CS(cast<Value>(II)); + if (CS && !isa<IntrinsicInst>(II)) { + const Function *Callee = CS.getCalledFunction(); + if (Callee) + Node->addCalledFunction(CS, getOrInsertFunction(Callee)); + else + Node->addCalledFunction(CS, CallsExternalNode); + } + } + } + + // + // destroy - Release memory for the call graph + virtual void destroy() { + /// CallsExternalNode is not in the function map, delete it explicitly. + if (CallsExternalNode) { + CallsExternalNode->allReferencesDropped(); + delete CallsExternalNode; + CallsExternalNode = 0; + } + CallGraph::destroy(); + } +}; + +} //End anonymous namespace + +INITIALIZE_ANALYSIS_GROUP(CallGraph, "Call Graph", BasicCallGraph) +INITIALIZE_AG_PASS(BasicCallGraph, CallGraph, "basiccg", + "Basic CallGraph Construction", false, true, true) + +char CallGraph::ID = 0; +char BasicCallGraph::ID = 0; + +void CallGraph::initialize(Module &M) { + Mod = &M; +} + +void CallGraph::destroy() { + if (FunctionMap.empty()) return; + + // Reset all node's use counts to zero before deleting them to prevent an + // assertion from firing. +#ifndef NDEBUG + for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end(); + I != E; ++I) + I->second->allReferencesDropped(); +#endif + + for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end(); + I != E; ++I) + delete I->second; + FunctionMap.clear(); +} + +void CallGraph::print(raw_ostream &OS, Module*) const { + for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I) + I->second->print(OS); +} +void CallGraph::dump() const { + print(dbgs(), 0); +} + +//===----------------------------------------------------------------------===// +// Implementations of public modification methods +// + +// removeFunctionFromModule - Unlink the function from this module, returning +// it. Because this removes the function from the module, the call graph node +// is destroyed. This is only valid if the function does not call any other +// functions (ie, there are no edges in it's CGN). The easiest way to do this +// is to dropAllReferences before calling this. +// +Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) { + assert(CGN->empty() && "Cannot remove function from call " + "graph if it references other functions!"); + Function *F = CGN->getFunction(); // Get the function for the call graph node + delete CGN; // Delete the call graph node for this func + FunctionMap.erase(F); // Remove the call graph node from the map + + Mod->getFunctionList().remove(F); + return F; +} + +/// spliceFunction - Replace the function represented by this node by another. +/// This does not rescan the body of the function, so it is suitable when +/// splicing the body of the old function to the new while also updating all +/// callers from old to new. +/// +void CallGraph::spliceFunction(const Function *From, const Function *To) { + assert(FunctionMap.count(From) && "No CallGraphNode for function!"); + assert(!FunctionMap.count(To) && + "Pointing CallGraphNode at a function that already exists"); + FunctionMapTy::iterator I = FunctionMap.find(From); + I->second->F = const_cast<Function*>(To); + FunctionMap[To] = I->second; + FunctionMap.erase(I); +} + +// getOrInsertFunction - This method is identical to calling operator[], but +// it will insert a new CallGraphNode for the specified function if one does +// not already exist. +CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) { + CallGraphNode *&CGN = FunctionMap[F]; + if (CGN) return CGN; + + assert((!F || F->getParent() == Mod) && "Function not in current module!"); + return CGN = new CallGraphNode(const_cast<Function*>(F)); +} + +void CallGraphNode::print(raw_ostream &OS) const { + if (Function *F = getFunction()) + OS << "Call graph node for function: '" << F->getName() << "'"; + else + OS << "Call graph node <<null function>>"; + + OS << "<<" << this << ">> #uses=" << getNumReferences() << '\n'; + + for (const_iterator I = begin(), E = end(); I != E; ++I) { + OS << " CS<" << I->first << "> calls "; + if (Function *FI = I->second->getFunction()) + OS << "function '" << FI->getName() <<"'\n"; + else + OS << "external node\n"; + } + OS << '\n'; +} + +void CallGraphNode::dump() const { print(dbgs()); } + +/// removeCallEdgeFor - This method removes the edge in the node for the +/// specified call site. Note that this method takes linear time, so it +/// should be used sparingly. +void CallGraphNode::removeCallEdgeFor(CallSite CS) { + for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { + assert(I != CalledFunctions.end() && "Cannot find callsite to remove!"); + if (I->first == CS.getInstruction()) { + I->second->DropRef(); + *I = CalledFunctions.back(); + CalledFunctions.pop_back(); + return; + } + } +} + +// removeAnyCallEdgeTo - This method removes any call edges from this node to +// the specified callee function. This takes more time to execute than +// removeCallEdgeTo, so it should not be used unless necessary. +void CallGraphNode::removeAnyCallEdgeTo(CallGraphNode *Callee) { + for (unsigned i = 0, e = CalledFunctions.size(); i != e; ++i) + if (CalledFunctions[i].second == Callee) { + Callee->DropRef(); + CalledFunctions[i] = CalledFunctions.back(); + CalledFunctions.pop_back(); + --i; --e; + } +} + +/// removeOneAbstractEdgeTo - Remove one edge associated with a null callsite +/// from this node to the specified callee function. +void CallGraphNode::removeOneAbstractEdgeTo(CallGraphNode *Callee) { + for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { + assert(I != CalledFunctions.end() && "Cannot find callee to remove!"); + CallRecord &CR = *I; + if (CR.second == Callee && CR.first == 0) { + Callee->DropRef(); + *I = CalledFunctions.back(); + CalledFunctions.pop_back(); + return; + } + } +} + +/// replaceCallEdge - This method replaces the edge in the node for the +/// specified call site with a new one. Note that this method takes linear +/// time, so it should be used sparingly. +void CallGraphNode::replaceCallEdge(CallSite CS, + CallSite NewCS, CallGraphNode *NewNode){ + for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { + assert(I != CalledFunctions.end() && "Cannot find callsite to remove!"); + if (I->first == CS.getInstruction()) { + I->second->DropRef(); + I->first = NewCS.getInstruction(); + I->second = NewNode; + NewNode->AddRef(); + return; + } + } +} + +// Enuse that users of CallGraph.h also link with this file +DEFINING_FILE_FOR(CallGraph) diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp new file mode 100644 index 0000000..659ffab --- /dev/null +++ b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -0,0 +1,608 @@ +//===- CallGraphSCCPass.cpp - Pass that operates BU on call graph ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CallGraphSCCPass class, which is used for passes +// which are implemented as bottom-up traversals on the call graph. Because +// there may be cycles in the call graph, passes of this type operate on the +// call-graph in SCC order: that is, they process function bottom-up, except for +// recursive functions, which they process all at once. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "cgscc-passmgr" +#include "llvm/CallGraphSCCPass.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Function.h" +#include "llvm/PassManagers.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +static cl::opt<unsigned> +MaxIterations("max-cg-scc-iterations", cl::ReallyHidden, cl::init(4)); + +STATISTIC(MaxSCCIterations, "Maximum CGSCCPassMgr iterations on one SCC"); + +//===----------------------------------------------------------------------===// +// CGPassManager +// +/// CGPassManager manages FPPassManagers and CallGraphSCCPasses. + +namespace { + +class CGPassManager : public ModulePass, public PMDataManager { +public: + static char ID; + explicit CGPassManager(int Depth) + : ModulePass(ID), PMDataManager(Depth) { } + + /// run - Execute all of the passes scheduled for execution. Keep track of + /// whether any of the passes modifies the module, and if so, return true. + bool runOnModule(Module &M); + + bool doInitialization(CallGraph &CG); + bool doFinalization(CallGraph &CG); + + /// Pass Manager itself does not invalidate any analysis info. + void getAnalysisUsage(AnalysisUsage &Info) const { + // CGPassManager walks SCC and it needs CallGraph. + Info.addRequired<CallGraph>(); + Info.setPreservesAll(); + } + + virtual const char *getPassName() const { + return "CallGraph Pass Manager"; + } + + virtual PMDataManager *getAsPMDataManager() { return this; } + virtual Pass *getAsPass() { return this; } + + // Print passes managed by this manager + void dumpPassStructure(unsigned Offset) { + errs().indent(Offset*2) << "Call Graph SCC Pass Manager\n"; + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + P->dumpPassStructure(Offset + 1); + dumpLastUses(P, Offset+1); + } + } + + Pass *getContainedPass(unsigned N) { + assert(N < PassVector.size() && "Pass number out of range!"); + return static_cast<Pass *>(PassVector[N]); + } + + virtual PassManagerType getPassManagerType() const { + return PMT_CallGraphPassManager; + } + +private: + bool RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, + bool &DevirtualizedCall); + + bool RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, + CallGraph &CG, bool &CallGraphUpToDate, + bool &DevirtualizedCall); + bool RefreshCallGraph(CallGraphSCC &CurSCC, CallGraph &CG, + bool IsCheckingMode); +}; + +} // end anonymous namespace. + +char CGPassManager::ID = 0; + + +bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, + CallGraph &CG, bool &CallGraphUpToDate, + bool &DevirtualizedCall) { + bool Changed = false; + PMDataManager *PM = P->getAsPMDataManager(); + + if (PM == 0) { + CallGraphSCCPass *CGSP = (CallGraphSCCPass*)P; + if (!CallGraphUpToDate) { + DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false); + CallGraphUpToDate = true; + } + + { + TimeRegion PassTimer(getPassTimer(CGSP)); + Changed = CGSP->runOnSCC(CurSCC); + } + + // After the CGSCCPass is done, when assertions are enabled, use + // RefreshCallGraph to verify that the callgraph was correctly updated. +#ifndef NDEBUG + if (Changed) + RefreshCallGraph(CurSCC, CG, true); +#endif + + return Changed; + } + + + assert(PM->getPassManagerType() == PMT_FunctionPassManager && + "Invalid CGPassManager member"); + FPPassManager *FPP = (FPPassManager*)P; + + // Run pass P on all functions in the current SCC. + for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end(); + I != E; ++I) { + if (Function *F = (*I)->getFunction()) { + dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName()); + TimeRegion PassTimer(getPassTimer(FPP)); + Changed |= FPP->runOnFunction(*F); + } + } + + // The function pass(es) modified the IR, they may have clobbered the + // callgraph. + if (Changed && CallGraphUpToDate) { + DEBUG(dbgs() << "CGSCCPASSMGR: Pass Dirtied SCC: " + << P->getPassName() << '\n'); + CallGraphUpToDate = false; + } + return Changed; +} + + +/// RefreshCallGraph - Scan the functions in the specified CFG and resync the +/// callgraph with the call sites found in it. This is used after +/// FunctionPasses have potentially munged the callgraph, and can be used after +/// CallGraphSCC passes to verify that they correctly updated the callgraph. +/// +/// This function returns true if it devirtualized an existing function call, +/// meaning it turned an indirect call into a direct call. This happens when +/// a function pass like GVN optimizes away stuff feeding the indirect call. +/// This never happens in checking mode. +/// +bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, + CallGraph &CG, bool CheckingMode) { + DenseMap<Value*, CallGraphNode*> CallSites; + + DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size() + << " nodes:\n"; + for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end(); + I != E; ++I) + (*I)->dump(); + ); + + bool MadeChange = false; + bool DevirtualizedCall = false; + + // Scan all functions in the SCC. + unsigned FunctionNo = 0; + for (CallGraphSCC::iterator SCCIdx = CurSCC.begin(), E = CurSCC.end(); + SCCIdx != E; ++SCCIdx, ++FunctionNo) { + CallGraphNode *CGN = *SCCIdx; + Function *F = CGN->getFunction(); + if (F == 0 || F->isDeclaration()) continue; + + // Walk the function body looking for call sites. Sync up the call sites in + // CGN with those actually in the function. + + // Keep track of the number of direct and indirect calls that were + // invalidated and removed. + unsigned NumDirectRemoved = 0, NumIndirectRemoved = 0; + + // Get the set of call sites currently in the function. + for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) { + // If this call site is null, then the function pass deleted the call + // entirely and the WeakVH nulled it out. + if (I->first == 0 || + // If we've already seen this call site, then the FunctionPass RAUW'd + // one call with another, which resulted in two "uses" in the edge + // list of the same call. + CallSites.count(I->first) || + + // If the call edge is not from a call or invoke, then the function + // pass RAUW'd a call with another value. This can happen when + // constant folding happens of well known functions etc. + !CallSite(I->first)) { + assert(!CheckingMode && + "CallGraphSCCPass did not update the CallGraph correctly!"); + + // If this was an indirect call site, count it. + if (I->second->getFunction() == 0) + ++NumIndirectRemoved; + else + ++NumDirectRemoved; + + // Just remove the edge from the set of callees, keep track of whether + // I points to the last element of the vector. + bool WasLast = I + 1 == E; + CGN->removeCallEdge(I); + + // If I pointed to the last element of the vector, we have to bail out: + // iterator checking rejects comparisons of the resultant pointer with + // end. + if (WasLast) + break; + E = CGN->end(); + continue; + } + + assert(!CallSites.count(I->first) && + "Call site occurs in node multiple times"); + CallSites.insert(std::make_pair(I->first, I->second)); + ++I; + } + + // Loop over all of the instructions in the function, getting the callsites. + // Keep track of the number of direct/indirect calls added. + unsigned NumDirectAdded = 0, NumIndirectAdded = 0; + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + CallSite CS(cast<Value>(I)); + if (!CS || isa<IntrinsicInst>(I)) continue; + + // If this call site already existed in the callgraph, just verify it + // matches up to expectations and remove it from CallSites. + DenseMap<Value*, CallGraphNode*>::iterator ExistingIt = + CallSites.find(CS.getInstruction()); + if (ExistingIt != CallSites.end()) { + CallGraphNode *ExistingNode = ExistingIt->second; + + // Remove from CallSites since we have now seen it. + CallSites.erase(ExistingIt); + + // Verify that the callee is right. + if (ExistingNode->getFunction() == CS.getCalledFunction()) + continue; + + // If we are in checking mode, we are not allowed to actually mutate + // the callgraph. If this is a case where we can infer that the + // callgraph is less precise than it could be (e.g. an indirect call + // site could be turned direct), don't reject it in checking mode, and + // don't tweak it to be more precise. + if (CheckingMode && CS.getCalledFunction() && + ExistingNode->getFunction() == 0) + continue; + + assert(!CheckingMode && + "CallGraphSCCPass did not update the CallGraph correctly!"); + + // If not, we either went from a direct call to indirect, indirect to + // direct, or direct to different direct. + CallGraphNode *CalleeNode; + if (Function *Callee = CS.getCalledFunction()) { + CalleeNode = CG.getOrInsertFunction(Callee); + // Keep track of whether we turned an indirect call into a direct + // one. + if (ExistingNode->getFunction() == 0) { + DevirtualizedCall = true; + DEBUG(dbgs() << " CGSCCPASSMGR: Devirtualized call to '" + << Callee->getName() << "'\n"); + } + } else { + CalleeNode = CG.getCallsExternalNode(); + } + + // Update the edge target in CGN. + CGN->replaceCallEdge(CS, CS, CalleeNode); + MadeChange = true; + continue; + } + + assert(!CheckingMode && + "CallGraphSCCPass did not update the CallGraph correctly!"); + + // If the call site didn't exist in the CGN yet, add it. + CallGraphNode *CalleeNode; + if (Function *Callee = CS.getCalledFunction()) { + CalleeNode = CG.getOrInsertFunction(Callee); + ++NumDirectAdded; + } else { + CalleeNode = CG.getCallsExternalNode(); + ++NumIndirectAdded; + } + + CGN->addCalledFunction(CS, CalleeNode); + MadeChange = true; + } + + // We scanned the old callgraph node, removing invalidated call sites and + // then added back newly found call sites. One thing that can happen is + // that an old indirect call site was deleted and replaced with a new direct + // call. In this case, we have devirtualized a call, and CGSCCPM would like + // to iteratively optimize the new code. Unfortunately, we don't really + // have a great way to detect when this happens. As an approximation, we + // just look at whether the number of indirect calls is reduced and the + // number of direct calls is increased. There are tons of ways to fool this + // (e.g. DCE'ing an indirect call and duplicating an unrelated block with a + // direct call) but this is close enough. + if (NumIndirectRemoved > NumIndirectAdded && + NumDirectRemoved < NumDirectAdded) + DevirtualizedCall = true; + + // After scanning this function, if we still have entries in callsites, then + // they are dangling pointers. WeakVH should save us for this, so abort if + // this happens. + assert(CallSites.empty() && "Dangling pointers found in call sites map"); + + // Periodically do an explicit clear to remove tombstones when processing + // large scc's. + if ((FunctionNo & 15) == 15) + CallSites.clear(); + } + + DEBUG(if (MadeChange) { + dbgs() << "CGSCCPASSMGR: Refreshed SCC is now:\n"; + for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end(); + I != E; ++I) + (*I)->dump(); + if (DevirtualizedCall) + dbgs() << "CGSCCPASSMGR: Refresh devirtualized a call!\n"; + + } else { + dbgs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n"; + } + ); + + return DevirtualizedCall; +} + +/// RunAllPassesOnSCC - Execute the body of the entire pass manager on the +/// specified SCC. This keeps track of whether a function pass devirtualizes +/// any calls and returns it in DevirtualizedCall. +bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG, + bool &DevirtualizedCall) { + bool Changed = false; + + // CallGraphUpToDate - Keep track of whether the callgraph is known to be + // up-to-date or not. The CGSSC pass manager runs two types of passes: + // CallGraphSCC Passes and other random function passes. Because other + // random function passes are not CallGraph aware, they may clobber the + // call graph by introducing new calls or deleting other ones. This flag + // is set to false when we run a function pass so that we know to clean up + // the callgraph when we need to run a CGSCCPass again. + bool CallGraphUpToDate = true; + + // Run all passes on current SCC. + for (unsigned PassNo = 0, e = getNumContainedPasses(); + PassNo != e; ++PassNo) { + Pass *P = getContainedPass(PassNo); + + // If we're in -debug-pass=Executions mode, construct the SCC node list, + // otherwise avoid constructing this string as it is expensive. + if (isPassDebuggingExecutionsOrMore()) { + std::string Functions; + #ifndef NDEBUG + raw_string_ostream OS(Functions); + for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end(); + I != E; ++I) { + if (I != CurSCC.begin()) OS << ", "; + (*I)->print(OS); + } + OS.flush(); + #endif + dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, Functions); + } + dumpRequiredSet(P); + + initializeAnalysisImpl(P); + + // Actually run this pass on the current SCC. + Changed |= RunPassOnSCC(P, CurSCC, CG, + CallGraphUpToDate, DevirtualizedCall); + + if (Changed) + dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, ""); + dumpPreservedSet(P); + + verifyPreservedAnalysis(P); + removeNotPreservedAnalysis(P); + recordAvailableAnalysis(P); + removeDeadPasses(P, "", ON_CG_MSG); + } + + // If the callgraph was left out of date (because the last pass run was a + // functionpass), refresh it before we move on to the next SCC. + if (!CallGraphUpToDate) + DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false); + return Changed; +} + +/// run - Execute all of the passes scheduled for execution. Keep track of +/// whether any of the passes modifies the module, and if so, return true. +bool CGPassManager::runOnModule(Module &M) { + CallGraph &CG = getAnalysis<CallGraph>(); + bool Changed = doInitialization(CG); + + // Walk the callgraph in bottom-up SCC order. + scc_iterator<CallGraph*> CGI = scc_begin(&CG); + + CallGraphSCC CurSCC(&CGI); + while (!CGI.isAtEnd()) { + // Copy the current SCC and increment past it so that the pass can hack + // on the SCC if it wants to without invalidating our iterator. + std::vector<CallGraphNode*> &NodeVec = *CGI; + CurSCC.initialize(&NodeVec[0], &NodeVec[0]+NodeVec.size()); + ++CGI; + + // At the top level, we run all the passes in this pass manager on the + // functions in this SCC. However, we support iterative compilation in the + // case where a function pass devirtualizes a call to a function. For + // example, it is very common for a function pass (often GVN or instcombine) + // to eliminate the addressing that feeds into a call. With that improved + // information, we would like the call to be an inline candidate, infer + // mod-ref information etc. + // + // Because of this, we allow iteration up to a specified iteration count. + // This only happens in the case of a devirtualized call, so we only burn + // compile time in the case that we're making progress. We also have a hard + // iteration count limit in case there is crazy code. + unsigned Iteration = 0; + bool DevirtualizedCall = false; + do { + DEBUG(if (Iteration) + dbgs() << " SCCPASSMGR: Re-visiting SCC, iteration #" + << Iteration << '\n'); + DevirtualizedCall = false; + Changed |= RunAllPassesOnSCC(CurSCC, CG, DevirtualizedCall); + } while (Iteration++ < MaxIterations && DevirtualizedCall); + + if (DevirtualizedCall) + DEBUG(dbgs() << " CGSCCPASSMGR: Stopped iteration after " << Iteration + << " times, due to -max-cg-scc-iterations\n"); + + if (Iteration > MaxSCCIterations) + MaxSCCIterations = Iteration; + + } + Changed |= doFinalization(CG); + return Changed; +} + + +/// Initialize CG +bool CGPassManager::doInitialization(CallGraph &CG) { + bool Changed = false; + for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) { + if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) { + assert(PM->getPassManagerType() == PMT_FunctionPassManager && + "Invalid CGPassManager member"); + Changed |= ((FPPassManager*)PM)->doInitialization(CG.getModule()); + } else { + Changed |= ((CallGraphSCCPass*)getContainedPass(i))->doInitialization(CG); + } + } + return Changed; +} + +/// Finalize CG +bool CGPassManager::doFinalization(CallGraph &CG) { + bool Changed = false; + for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) { + if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) { + assert(PM->getPassManagerType() == PMT_FunctionPassManager && + "Invalid CGPassManager member"); + Changed |= ((FPPassManager*)PM)->doFinalization(CG.getModule()); + } else { + Changed |= ((CallGraphSCCPass*)getContainedPass(i))->doFinalization(CG); + } + } + return Changed; +} + +//===----------------------------------------------------------------------===// +// CallGraphSCC Implementation +//===----------------------------------------------------------------------===// + +/// ReplaceNode - This informs the SCC and the pass manager that the specified +/// Old node has been deleted, and New is to be used in its place. +void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) { + assert(Old != New && "Should not replace node with self"); + for (unsigned i = 0; ; ++i) { + assert(i != Nodes.size() && "Node not in SCC"); + if (Nodes[i] != Old) continue; + Nodes[i] = New; + break; + } + + // Update the active scc_iterator so that it doesn't contain dangling + // pointers to the old CallGraphNode. + scc_iterator<CallGraph*> *CGI = (scc_iterator<CallGraph*>*)Context; + CGI->ReplaceNode(Old, New); +} + + +//===----------------------------------------------------------------------===// +// CallGraphSCCPass Implementation +//===----------------------------------------------------------------------===// + +/// Assign pass manager to manage this pass. +void CallGraphSCCPass::assignPassManager(PMStack &PMS, + PassManagerType PreferredType) { + // Find CGPassManager + while (!PMS.empty() && + PMS.top()->getPassManagerType() > PMT_CallGraphPassManager) + PMS.pop(); + + assert(!PMS.empty() && "Unable to handle Call Graph Pass"); + CGPassManager *CGP; + + if (PMS.top()->getPassManagerType() == PMT_CallGraphPassManager) + CGP = (CGPassManager*)PMS.top(); + else { + // Create new Call Graph SCC Pass Manager if it does not exist. + assert(!PMS.empty() && "Unable to create Call Graph Pass Manager"); + PMDataManager *PMD = PMS.top(); + + // [1] Create new Call Graph Pass Manager + CGP = new CGPassManager(PMD->getDepth() + 1); + + // [2] Set up new manager's top level manager + PMTopLevelManager *TPM = PMD->getTopLevelManager(); + TPM->addIndirectPassManager(CGP); + + // [3] Assign manager to manage this new manager. This may create + // and push new managers into PMS + Pass *P = CGP; + TPM->schedulePass(P); + + // [4] Push new manager into PMS + PMS.push(CGP); + } + + CGP->add(this); +} + +/// getAnalysisUsage - For this class, we declare that we require and preserve +/// the call graph. If the derived class implements this method, it should +/// always explicitly call the implementation here. +void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<CallGraph>(); + AU.addPreserved<CallGraph>(); +} + + +//===----------------------------------------------------------------------===// +// PrintCallGraphPass Implementation +//===----------------------------------------------------------------------===// + +namespace { + /// PrintCallGraphPass - Print a Module corresponding to a call graph. + /// + class PrintCallGraphPass : public CallGraphSCCPass { + std::string Banner; + raw_ostream &Out; // raw_ostream to print on. + + public: + static char ID; + PrintCallGraphPass(const std::string &B, raw_ostream &o) + : CallGraphSCCPass(ID), Banner(B), Out(o) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + bool runOnSCC(CallGraphSCC &SCC) { + Out << Banner; + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) + (*I)->getFunction()->print(Out); + return false; + } + }; + +} // end anonymous namespace. + +char PrintCallGraphPass::ID = 0; + +Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &O, + const std::string &Banner) const { + return new PrintCallGraphPass(Banner, O); +} + diff --git a/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp b/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp new file mode 100644 index 0000000..6535786 --- /dev/null +++ b/contrib/llvm/lib/Analysis/IPA/FindUsedTypes.cpp @@ -0,0 +1,101 @@ +//===- FindUsedTypes.cpp - Find all Types used by a module ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is used to seek out all of the types in use by the program. Note +// that this analysis explicitly does not include types only used by the symbol +// table. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/FindUsedTypes.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +char FindUsedTypes::ID = 0; +INITIALIZE_PASS(FindUsedTypes, "print-used-types", + "Find Used Types", false, true) + +// IncorporateType - Incorporate one type and all of its subtypes into the +// collection of used types. +// +void FindUsedTypes::IncorporateType(const Type *Ty) { + // If ty doesn't already exist in the used types map, add it now, otherwise + // return. + if (!UsedTypes.insert(Ty)) return; // Already contain Ty. + + // Make sure to add any types this type references now. + // + for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end(); + I != E; ++I) + IncorporateType(*I); +} + +void FindUsedTypes::IncorporateValue(const Value *V) { + IncorporateType(V->getType()); + + // If this is a constant, it could be using other types... + if (const Constant *C = dyn_cast<Constant>(V)) { + if (!isa<GlobalValue>(C)) + for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end(); + OI != OE; ++OI) + IncorporateValue(*OI); + } +} + + +// run - This incorporates all types used by the specified module +// +bool FindUsedTypes::runOnModule(Module &m) { + UsedTypes.clear(); // reset if run multiple times... + + // Loop over global variables, incorporating their types + for (Module::const_global_iterator I = m.global_begin(), E = m.global_end(); + I != E; ++I) { + IncorporateType(I->getType()); + if (I->hasInitializer()) + IncorporateValue(I->getInitializer()); + } + + for (Module::iterator MI = m.begin(), ME = m.end(); MI != ME; ++MI) { + IncorporateType(MI->getType()); + const Function &F = *MI; + + // Loop over all of the instructions in the function, adding their return + // type as well as the types of their operands. + // + for (const_inst_iterator II = inst_begin(F), IE = inst_end(F); + II != IE; ++II) { + const Instruction &I = *II; + + IncorporateType(I.getType()); // Incorporate the type of the instruction + for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end(); + OI != OE; ++OI) + IncorporateValue(*OI); // Insert inst operand types as well + } + } + + return false; +} + +// Print the types found in the module. If the optional Module parameter is +// passed in, then the types are printed symbolically if possible, using the +// symbol table from the module. +// +void FindUsedTypes::print(raw_ostream &OS, const Module *M) const { + OS << "Types in use by this module:\n"; + for (SetVector<const Type *>::const_iterator I = UsedTypes.begin(), + E = UsedTypes.end(); I != E; ++I) { + OS << " " << **I << '\n'; + } +} diff --git a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp new file mode 100644 index 0000000..b226d66 --- /dev/null +++ b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp @@ -0,0 +1,609 @@ +//===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This simple pass provides alias and mod/ref information for global values +// that do not have their address taken, and keeps track of whether functions +// read or write memory (are "pure"). For this simple (but very common) case, +// we can provide pretty accurate and useful information. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "globalsmodref-aa" +#include "llvm/Analysis/Passes.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Instructions.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SCCIterator.h" +#include <set> +using namespace llvm; + +STATISTIC(NumNonAddrTakenGlobalVars, + "Number of global vars without address taken"); +STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken"); +STATISTIC(NumNoMemFunctions, "Number of functions that do not access memory"); +STATISTIC(NumReadMemFunctions, "Number of functions that only read memory"); +STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects"); + +namespace { + /// FunctionRecord - One instance of this structure is stored for every + /// function in the program. Later, the entries for these functions are + /// removed if the function is found to call an external function (in which + /// case we know nothing about it. + struct FunctionRecord { + /// GlobalInfo - Maintain mod/ref info for all of the globals without + /// addresses taken that are read or written (transitively) by this + /// function. + std::map<const GlobalValue*, unsigned> GlobalInfo; + + /// MayReadAnyGlobal - May read global variables, but it is not known which. + bool MayReadAnyGlobal; + + unsigned getInfoForGlobal(const GlobalValue *GV) const { + unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0; + std::map<const GlobalValue*, unsigned>::const_iterator I = + GlobalInfo.find(GV); + if (I != GlobalInfo.end()) + Effect |= I->second; + return Effect; + } + + /// FunctionEffect - Capture whether or not this function reads or writes to + /// ANY memory. If not, we can do a lot of aggressive analysis on it. + unsigned FunctionEffect; + + FunctionRecord() : MayReadAnyGlobal (false), FunctionEffect(0) {} + }; + + /// GlobalsModRef - The actual analysis pass. + class GlobalsModRef : public ModulePass, public AliasAnalysis { + /// NonAddressTakenGlobals - The globals that do not have their addresses + /// taken. + std::set<const GlobalValue*> NonAddressTakenGlobals; + + /// IndirectGlobals - The memory pointed to by this global is known to be + /// 'owned' by the global. + std::set<const GlobalValue*> IndirectGlobals; + + /// AllocsForIndirectGlobals - If an instruction allocates memory for an + /// indirect global, this map indicates which one. + std::map<const Value*, const GlobalValue*> AllocsForIndirectGlobals; + + /// FunctionInfo - For each function, keep track of what globals are + /// modified or read. + std::map<const Function*, FunctionRecord> FunctionInfo; + + public: + static char ID; + GlobalsModRef() : ModulePass(ID) { + initializeGlobalsModRefPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) { + InitializeAliasAnalysis(this); // set up super class + AnalyzeGlobals(M); // find non-addr taken globals + AnalyzeCallGraph(getAnalysis<CallGraph>(), M); // Propagate on CG + return false; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AliasAnalysis::getAnalysisUsage(AU); + AU.addRequired<CallGraph>(); + AU.setPreservesAll(); // Does not transform code + } + + //------------------------------------------------ + // Implement the AliasAnalysis API + // + AliasResult alias(const Location &LocA, const Location &LocB); + ModRefResult getModRefInfo(ImmutableCallSite CS, + const Location &Loc); + ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + return AliasAnalysis::getModRefInfo(CS1, CS2); + } + + /// getModRefBehavior - Return the behavior of the specified function if + /// called from the specified call site. The call site may be null in which + /// case the most generic behavior of this function should be returned. + ModRefBehavior getModRefBehavior(const Function *F) { + ModRefBehavior Min = UnknownModRefBehavior; + + if (FunctionRecord *FR = getFunctionInfo(F)) { + if (FR->FunctionEffect == 0) + Min = DoesNotAccessMemory; + else if ((FR->FunctionEffect & Mod) == 0) + Min = OnlyReadsMemory; + } + + return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min); + } + + /// getModRefBehavior - Return the behavior of the specified function if + /// called from the specified call site. The call site may be null in which + /// case the most generic behavior of this function should be returned. + ModRefBehavior getModRefBehavior(ImmutableCallSite CS) { + ModRefBehavior Min = UnknownModRefBehavior; + + if (const Function* F = CS.getCalledFunction()) + if (FunctionRecord *FR = getFunctionInfo(F)) { + if (FR->FunctionEffect == 0) + Min = DoesNotAccessMemory; + else if ((FR->FunctionEffect & Mod) == 0) + Min = OnlyReadsMemory; + } + + return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); + } + + virtual void deleteValue(Value *V); + virtual void copyValue(Value *From, Value *To); + virtual void addEscapingUse(Use &U); + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + + private: + /// getFunctionInfo - Return the function info for the function, or null if + /// we don't have anything useful to say about it. + FunctionRecord *getFunctionInfo(const Function *F) { + std::map<const Function*, FunctionRecord>::iterator I = + FunctionInfo.find(F); + if (I != FunctionInfo.end()) + return &I->second; + return 0; + } + + void AnalyzeGlobals(Module &M); + void AnalyzeCallGraph(CallGraph &CG, Module &M); + bool AnalyzeUsesOfPointer(Value *V, std::vector<Function*> &Readers, + std::vector<Function*> &Writers, + GlobalValue *OkayStoreDest = 0); + bool AnalyzeIndirectGlobalMemory(GlobalValue *GV); + }; +} + +char GlobalsModRef::ID = 0; +INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis, + "globalsmodref-aa", "Simple mod/ref analysis for globals", + false, true, false) +INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis, + "globalsmodref-aa", "Simple mod/ref analysis for globals", + false, true, false) + +Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); } + +/// AnalyzeGlobals - Scan through the users of all of the internal +/// GlobalValue's in the program. If none of them have their "address taken" +/// (really, their address passed to something nontrivial), record this fact, +/// and record the functions that they are used directly in. +void GlobalsModRef::AnalyzeGlobals(Module &M) { + std::vector<Function*> Readers, Writers; + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (I->hasLocalLinkage()) { + if (!AnalyzeUsesOfPointer(I, Readers, Writers)) { + // Remember that we are tracking this global. + NonAddressTakenGlobals.insert(I); + ++NumNonAddrTakenFunctions; + } + Readers.clear(); Writers.clear(); + } + + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + if (I->hasLocalLinkage()) { + if (!AnalyzeUsesOfPointer(I, Readers, Writers)) { + // Remember that we are tracking this global, and the mod/ref fns + NonAddressTakenGlobals.insert(I); + + for (unsigned i = 0, e = Readers.size(); i != e; ++i) + FunctionInfo[Readers[i]].GlobalInfo[I] |= Ref; + + if (!I->isConstant()) // No need to keep track of writers to constants + for (unsigned i = 0, e = Writers.size(); i != e; ++i) + FunctionInfo[Writers[i]].GlobalInfo[I] |= Mod; + ++NumNonAddrTakenGlobalVars; + + // If this global holds a pointer type, see if it is an indirect global. + if (I->getType()->getElementType()->isPointerTy() && + AnalyzeIndirectGlobalMemory(I)) + ++NumIndirectGlobalVars; + } + Readers.clear(); Writers.clear(); + } +} + +/// AnalyzeUsesOfPointer - Look at all of the users of the specified pointer. +/// If this is used by anything complex (i.e., the address escapes), return +/// true. Also, while we are at it, keep track of those functions that read and +/// write to the value. +/// +/// If OkayStoreDest is non-null, stores into this global are allowed. +bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, + std::vector<Function*> &Readers, + std::vector<Function*> &Writers, + GlobalValue *OkayStoreDest) { + if (!V->getType()->isPointerTy()) return true; + + for (Value::use_iterator UI = V->use_begin(), E=V->use_end(); UI != E; ++UI) { + User *U = *UI; + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { + Readers.push_back(LI->getParent()->getParent()); + } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + if (V == SI->getOperand(1)) { + Writers.push_back(SI->getParent()->getParent()); + } else if (SI->getOperand(1) != OkayStoreDest) { + return true; // Storing the pointer + } + } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { + if (AnalyzeUsesOfPointer(GEP, Readers, Writers)) return true; + } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { + if (AnalyzeUsesOfPointer(BCI, Readers, Writers, OkayStoreDest)) + return true; + } else if (isFreeCall(U)) { + Writers.push_back(cast<Instruction>(U)->getParent()->getParent()); + } else if (CallInst *CI = dyn_cast<CallInst>(U)) { + // Make sure that this is just the function being called, not that it is + // passing into the function. + for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) + if (CI->getArgOperand(i) == V) return true; + } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) { + // Make sure that this is just the function being called, not that it is + // passing into the function. + for (unsigned i = 0, e = II->getNumArgOperands(); i != e; ++i) + if (II->getArgOperand(i) == V) return true; + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { + if (CE->getOpcode() == Instruction::GetElementPtr || + CE->getOpcode() == Instruction::BitCast) { + if (AnalyzeUsesOfPointer(CE, Readers, Writers)) + return true; + } else { + return true; + } + } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) { + if (!isa<ConstantPointerNull>(ICI->getOperand(1))) + return true; // Allow comparison against null. + } else { + return true; + } + } + + return false; +} + +/// AnalyzeIndirectGlobalMemory - We found an non-address-taken global variable +/// which holds a pointer type. See if the global always points to non-aliased +/// heap memory: that is, all initializers of the globals are allocations, and +/// those allocations have no use other than initialization of the global. +/// Further, all loads out of GV must directly use the memory, not store the +/// pointer somewhere. If this is true, we consider the memory pointed to by +/// GV to be owned by GV and can disambiguate other pointers from it. +bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { + // Keep track of values related to the allocation of the memory, f.e. the + // value produced by the malloc call and any casts. + std::vector<Value*> AllocRelatedValues; + + // Walk the user list of the global. If we find anything other than a direct + // load or store, bail out. + for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){ + User *U = *I; + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { + // The pointer loaded from the global can only be used in simple ways: + // we allow addressing of it and loading storing to it. We do *not* allow + // storing the loaded pointer somewhere else or passing to a function. + std::vector<Function*> ReadersWriters; + if (AnalyzeUsesOfPointer(LI, ReadersWriters, ReadersWriters)) + return false; // Loaded pointer escapes. + // TODO: Could try some IP mod/ref of the loaded pointer. + } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + // Storing the global itself. + if (SI->getOperand(0) == GV) return false; + + // If storing the null pointer, ignore it. + if (isa<ConstantPointerNull>(SI->getOperand(0))) + continue; + + // Check the value being stored. + Value *Ptr = GetUnderlyingObject(SI->getOperand(0)); + + if (isMalloc(Ptr)) { + // Okay, easy case. + } else if (CallInst *CI = dyn_cast<CallInst>(Ptr)) { + Function *F = CI->getCalledFunction(); + if (!F || !F->isDeclaration()) return false; // Too hard to analyze. + if (F->getName() != "calloc") return false; // Not calloc. + } else { + return false; // Too hard to analyze. + } + + // Analyze all uses of the allocation. If any of them are used in a + // non-simple way (e.g. stored to another global) bail out. + std::vector<Function*> ReadersWriters; + if (AnalyzeUsesOfPointer(Ptr, ReadersWriters, ReadersWriters, GV)) + return false; // Loaded pointer escapes. + + // Remember that this allocation is related to the indirect global. + AllocRelatedValues.push_back(Ptr); + } else { + // Something complex, bail out. + return false; + } + } + + // Okay, this is an indirect global. Remember all of the allocations for + // this global in AllocsForIndirectGlobals. + while (!AllocRelatedValues.empty()) { + AllocsForIndirectGlobals[AllocRelatedValues.back()] = GV; + AllocRelatedValues.pop_back(); + } + IndirectGlobals.insert(GV); + return true; +} + +/// AnalyzeCallGraph - At this point, we know the functions where globals are +/// immediately stored to and read from. Propagate this information up the call +/// graph to all callers and compute the mod/ref info for all memory for each +/// function. +void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { + // We do a bottom-up SCC traversal of the call graph. In other words, we + // visit all callees before callers (leaf-first). + for (scc_iterator<CallGraph*> I = scc_begin(&CG), E = scc_end(&CG); I != E; + ++I) { + std::vector<CallGraphNode *> &SCC = *I; + assert(!SCC.empty() && "SCC with no functions?"); + + if (!SCC[0]->getFunction()) { + // Calls externally - can't say anything useful. Remove any existing + // function records (may have been created when scanning globals). + for (unsigned i = 0, e = SCC.size(); i != e; ++i) + FunctionInfo.erase(SCC[i]->getFunction()); + continue; + } + + FunctionRecord &FR = FunctionInfo[SCC[0]->getFunction()]; + + bool KnowNothing = false; + unsigned FunctionEffect = 0; + + // Collect the mod/ref properties due to called functions. We only compute + // one mod-ref set. + for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) { + Function *F = SCC[i]->getFunction(); + if (!F) { + KnowNothing = true; + break; + } + + if (F->isDeclaration()) { + // Try to get mod/ref behaviour from function attributes. + if (F->doesNotAccessMemory()) { + // Can't do better than that! + } else if (F->onlyReadsMemory()) { + FunctionEffect |= Ref; + if (!F->isIntrinsic()) + // This function might call back into the module and read a global - + // consider every global as possibly being read by this function. + FR.MayReadAnyGlobal = true; + } else { + FunctionEffect |= ModRef; + // Can't say anything useful unless it's an intrinsic - they don't + // read or write global variables of the kind considered here. + KnowNothing = !F->isIntrinsic(); + } + continue; + } + + for (CallGraphNode::iterator CI = SCC[i]->begin(), E = SCC[i]->end(); + CI != E && !KnowNothing; ++CI) + if (Function *Callee = CI->second->getFunction()) { + if (FunctionRecord *CalleeFR = getFunctionInfo(Callee)) { + // Propagate function effect up. + FunctionEffect |= CalleeFR->FunctionEffect; + + // Incorporate callee's effects on globals into our info. + for (std::map<const GlobalValue*, unsigned>::iterator GI = + CalleeFR->GlobalInfo.begin(), E = CalleeFR->GlobalInfo.end(); + GI != E; ++GI) + FR.GlobalInfo[GI->first] |= GI->second; + FR.MayReadAnyGlobal |= CalleeFR->MayReadAnyGlobal; + } else { + // Can't say anything about it. However, if it is inside our SCC, + // then nothing needs to be done. + CallGraphNode *CalleeNode = CG[Callee]; + if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end()) + KnowNothing = true; + } + } else { + KnowNothing = true; + } + } + + // If we can't say anything useful about this SCC, remove all SCC functions + // from the FunctionInfo map. + if (KnowNothing) { + for (unsigned i = 0, e = SCC.size(); i != e; ++i) + FunctionInfo.erase(SCC[i]->getFunction()); + continue; + } + + // Scan the function bodies for explicit loads or stores. + for (unsigned i = 0, e = SCC.size(); i != e && FunctionEffect != ModRef;++i) + for (inst_iterator II = inst_begin(SCC[i]->getFunction()), + E = inst_end(SCC[i]->getFunction()); + II != E && FunctionEffect != ModRef; ++II) + if (isa<LoadInst>(*II)) { + FunctionEffect |= Ref; + if (cast<LoadInst>(*II).isVolatile()) + // Volatile loads may have side-effects, so mark them as writing + // memory (for example, a flag inside the processor). + FunctionEffect |= Mod; + } else if (isa<StoreInst>(*II)) { + FunctionEffect |= Mod; + if (cast<StoreInst>(*II).isVolatile()) + // Treat volatile stores as reading memory somewhere. + FunctionEffect |= Ref; + } else if (isMalloc(&cast<Instruction>(*II)) || + isFreeCall(&cast<Instruction>(*II))) { + FunctionEffect |= ModRef; + } + + if ((FunctionEffect & Mod) == 0) + ++NumReadMemFunctions; + if (FunctionEffect == 0) + ++NumNoMemFunctions; + FR.FunctionEffect = FunctionEffect; + + // Finally, now that we know the full effect on this SCC, clone the + // information to each function in the SCC. + for (unsigned i = 1, e = SCC.size(); i != e; ++i) + FunctionInfo[SCC[i]->getFunction()] = FR; + } +} + + + +/// alias - If one of the pointers is to a global that we are tracking, and the +/// other is some random pointer, we know there cannot be an alias, because the +/// address of the global isn't taken. +AliasAnalysis::AliasResult +GlobalsModRef::alias(const Location &LocA, + const Location &LocB) { + // Get the base object these pointers point to. + const Value *UV1 = GetUnderlyingObject(LocA.Ptr); + const Value *UV2 = GetUnderlyingObject(LocB.Ptr); + + // If either of the underlying values is a global, they may be non-addr-taken + // globals, which we can answer queries about. + const GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1); + const GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2); + if (GV1 || GV2) { + // If the global's address is taken, pretend we don't know it's a pointer to + // the global. + if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = 0; + if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = 0; + + // If the two pointers are derived from two different non-addr-taken + // globals, or if one is and the other isn't, we know these can't alias. + if ((GV1 || GV2) && GV1 != GV2) + return NoAlias; + + // Otherwise if they are both derived from the same addr-taken global, we + // can't know the two accesses don't overlap. + } + + // These pointers may be based on the memory owned by an indirect global. If + // so, we may be able to handle this. First check to see if the base pointer + // is a direct load from an indirect global. + GV1 = GV2 = 0; + if (const LoadInst *LI = dyn_cast<LoadInst>(UV1)) + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0))) + if (IndirectGlobals.count(GV)) + GV1 = GV; + if (const LoadInst *LI = dyn_cast<LoadInst>(UV2)) + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0))) + if (IndirectGlobals.count(GV)) + GV2 = GV; + + // These pointers may also be from an allocation for the indirect global. If + // so, also handle them. + if (AllocsForIndirectGlobals.count(UV1)) + GV1 = AllocsForIndirectGlobals[UV1]; + if (AllocsForIndirectGlobals.count(UV2)) + GV2 = AllocsForIndirectGlobals[UV2]; + + // Now that we know whether the two pointers are related to indirect globals, + // use this to disambiguate the pointers. If either pointer is based on an + // indirect global and if they are not both based on the same indirect global, + // they cannot alias. + if ((GV1 || GV2) && GV1 != GV2) + return NoAlias; + + return AliasAnalysis::alias(LocA, LocB); +} + +AliasAnalysis::ModRefResult +GlobalsModRef::getModRefInfo(ImmutableCallSite CS, + const Location &Loc) { + unsigned Known = ModRef; + + // If we are asking for mod/ref info of a direct call with a pointer to a + // global we are tracking, return information if we have it. + if (const GlobalValue *GV = + dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr))) + if (GV->hasLocalLinkage()) + if (const Function *F = CS.getCalledFunction()) + if (NonAddressTakenGlobals.count(GV)) + if (const FunctionRecord *FR = getFunctionInfo(F)) + Known = FR->getInfoForGlobal(GV); + + if (Known == NoModRef) + return NoModRef; // No need to query other mod/ref analyses + return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, Loc)); +} + + +//===----------------------------------------------------------------------===// +// Methods to update the analysis as a result of the client transformation. +// +void GlobalsModRef::deleteValue(Value *V) { + if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { + if (NonAddressTakenGlobals.erase(GV)) { + // This global might be an indirect global. If so, remove it and remove + // any AllocRelatedValues for it. + if (IndirectGlobals.erase(GV)) { + // Remove any entries in AllocsForIndirectGlobals for this global. + for (std::map<const Value*, const GlobalValue*>::iterator + I = AllocsForIndirectGlobals.begin(), + E = AllocsForIndirectGlobals.end(); I != E; ) { + if (I->second == GV) { + AllocsForIndirectGlobals.erase(I++); + } else { + ++I; + } + } + } + } + } + + // Otherwise, if this is an allocation related to an indirect global, remove + // it. + AllocsForIndirectGlobals.erase(V); + + AliasAnalysis::deleteValue(V); +} + +void GlobalsModRef::copyValue(Value *From, Value *To) { + AliasAnalysis::copyValue(From, To); +} + +void GlobalsModRef::addEscapingUse(Use &U) { + // For the purposes of this analysis, it is conservatively correct to treat + // a newly escaping value equivalently to a deleted one. We could perhaps + // be more precise by processing the new use and attempting to update our + // saved analysis results to accommodate it. + deleteValue(U); + + AliasAnalysis::addEscapingUse(U); +} diff --git a/contrib/llvm/lib/Analysis/IPA/IPA.cpp b/contrib/llvm/lib/Analysis/IPA/IPA.cpp new file mode 100644 index 0000000..0ba2e04 --- /dev/null +++ b/contrib/llvm/lib/Analysis/IPA/IPA.cpp @@ -0,0 +1,29 @@ +//===-- IPA.cpp -----------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the common initialization routines for the IPA library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/InitializePasses.h" +#include "llvm-c/Initialization.h" + +using namespace llvm; + +/// initializeIPA - Initialize all passes linked into the IPA library. +void llvm::initializeIPA(PassRegistry &Registry) { + initializeBasicCallGraphPass(Registry); + initializeCallGraphAnalysisGroup(Registry); + initializeFindUsedTypesPass(Registry); + initializeGlobalsModRefPass(Registry); +} + +void LLVMInitializeIPA(LLVMPassRegistryRef R) { + initializeIPA(*unwrap(R)); +} diff --git a/contrib/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm/lib/Analysis/IVUsers.cpp new file mode 100644 index 0000000..e5f0a77 --- /dev/null +++ b/contrib/llvm/lib/Analysis/IVUsers.cpp @@ -0,0 +1,272 @@ +//===- IVUsers.cpp - Induction Variable Users -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements bookkeeping for "interesting" users of expressions +// computed from induction variables. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "iv-users" +#include "llvm/Analysis/IVUsers.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Type.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +using namespace llvm; + +char IVUsers::ID = 0; +INITIALIZE_PASS_BEGIN(IVUsers, "iv-users", + "Induction Variable Users", false, true) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_END(IVUsers, "iv-users", + "Induction Variable Users", false, true) + +Pass *llvm::createIVUsersPass() { + return new IVUsers(); +} + +/// isInteresting - Test whether the given expression is "interesting" when +/// used by the given expression, within the context of analyzing the +/// given loop. +static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, + ScalarEvolution *SE, LoopInfo *LI) { + // An addrec is interesting if it's affine or if it has an interesting start. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + // Keep things simple. Don't touch loop-variant strides unless they're + // only used outside the loop and we can simplify them. + if (AR->getLoop() == L) + return AR->isAffine() || + (!L->contains(I) && + SE->getSCEVAtScope(AR, LI->getLoopFor(I->getParent())) != AR); + // Otherwise recurse to see if the start value is interesting, and that + // the step value is not interesting, since we don't yet know how to + // do effective SCEV expansions for addrecs with interesting steps. + return isInteresting(AR->getStart(), I, L, SE, LI) && + !isInteresting(AR->getStepRecurrence(*SE), I, L, SE, LI); + } + + // An add is interesting if exactly one of its operands is interesting. + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + bool AnyInterestingYet = false; + for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end(); + OI != OE; ++OI) + if (isInteresting(*OI, I, L, SE, LI)) { + if (AnyInterestingYet) + return false; + AnyInterestingYet = true; + } + return AnyInterestingYet; + } + + // Nothing else is interesting here. + return false; +} + +/// AddUsersIfInteresting - Inspect the specified instruction. If it is a +/// reducible SCEV, recursively add its users to the IVUsesByStride set and +/// return true. Otherwise, return false. +bool IVUsers::AddUsersIfInteresting(Instruction *I) { + if (!SE->isSCEVable(I->getType())) + return false; // Void and FP expressions cannot be reduced. + + // LSR is not APInt clean, do not touch integers bigger than 64-bits. + // Also avoid creating IVs of non-native types. For example, we don't want a + // 64-bit IV in 32-bit code just because the loop has one 64-bit cast. + uint64_t Width = SE->getTypeSizeInBits(I->getType()); + if (Width > 64 || (TD && !TD->isLegalInteger(Width))) + return false; + + if (!Processed.insert(I)) + return true; // Instruction already handled. + + // Get the symbolic expression for this instruction. + const SCEV *ISE = SE->getSCEV(I); + + // If we've come to an uninteresting expression, stop the traversal and + // call this a user. + if (!isInteresting(ISE, I, L, SE, LI)) + return false; + + SmallPtrSet<Instruction *, 4> UniqueUsers; + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) { + Instruction *User = cast<Instruction>(*UI); + if (!UniqueUsers.insert(User)) + continue; + + // Do not infinitely recurse on PHI nodes. + if (isa<PHINode>(User) && Processed.count(User)) + continue; + + // Descend recursively, but not into PHI nodes outside the current loop. + // It's important to see the entire expression outside the loop to get + // choices that depend on addressing mode use right, although we won't + // consider references outside the loop in all cases. + // If User is already in Processed, we don't want to recurse into it again, + // but do want to record a second reference in the same instruction. + bool AddUserToIVUsers = false; + if (LI->getLoopFor(User->getParent()) != L) { + if (isa<PHINode>(User) || Processed.count(User) || + !AddUsersIfInteresting(User)) { + DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n' + << " OF SCEV: " << *ISE << '\n'); + AddUserToIVUsers = true; + } + } else if (Processed.count(User) || !AddUsersIfInteresting(User)) { + DEBUG(dbgs() << "FOUND USER: " << *User << '\n' + << " OF SCEV: " << *ISE << '\n'); + AddUserToIVUsers = true; + } + + if (AddUserToIVUsers) { + // Okay, we found a user that we cannot reduce. + IVUses.push_back(new IVStrideUse(this, User, I)); + IVStrideUse &NewUse = IVUses.back(); + // Autodetect the post-inc loop set, populating NewUse.PostIncLoops. + // The regular return value here is discarded; instead of recording + // it, we just recompute it when we need it. + ISE = TransformForPostIncUse(NormalizeAutodetect, + ISE, User, I, + NewUse.PostIncLoops, + *SE, *DT); + DEBUG(dbgs() << " NORMALIZED TO: " << *ISE << '\n'); + } + } + return true; +} + +IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) { + IVUses.push_back(new IVStrideUse(this, User, Operand)); + return IVUses.back(); +} + +IVUsers::IVUsers() + : LoopPass(ID) { + initializeIVUsersPass(*PassRegistry::getPassRegistry()); +} + +void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<LoopInfo>(); + AU.addRequired<DominatorTree>(); + AU.addRequired<ScalarEvolution>(); + AU.setPreservesAll(); +} + +bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { + + L = l; + LI = &getAnalysis<LoopInfo>(); + DT = &getAnalysis<DominatorTree>(); + SE = &getAnalysis<ScalarEvolution>(); + TD = getAnalysisIfAvailable<TargetData>(); + + // Find all uses of induction variables in this loop, and categorize + // them by stride. Start by finding all of the PHI nodes in the header for + // this loop. If they are induction variables, inspect their uses. + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) + (void)AddUsersIfInteresting(I); + + return false; +} + +void IVUsers::print(raw_ostream &OS, const Module *M) const { + OS << "IV Users for loop "; + WriteAsOperand(OS, L->getHeader(), false); + if (SE->hasLoopInvariantBackedgeTakenCount(L)) { + OS << " with backedge-taken count " + << *SE->getBackedgeTakenCount(L); + } + OS << ":\n"; + + for (ilist<IVStrideUse>::const_iterator UI = IVUses.begin(), + E = IVUses.end(); UI != E; ++UI) { + OS << " "; + WriteAsOperand(OS, UI->getOperandValToReplace(), false); + OS << " = " << *getReplacementExpr(*UI); + for (PostIncLoopSet::const_iterator + I = UI->PostIncLoops.begin(), + E = UI->PostIncLoops.end(); I != E; ++I) { + OS << " (post-inc with loop "; + WriteAsOperand(OS, (*I)->getHeader(), false); + OS << ")"; + } + OS << " in "; + UI->getUser()->print(OS); + OS << '\n'; + } +} + +void IVUsers::dump() const { + print(dbgs()); +} + +void IVUsers::releaseMemory() { + Processed.clear(); + IVUses.clear(); +} + +/// getReplacementExpr - Return a SCEV expression which computes the +/// value of the OperandValToReplace. +const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &IU) const { + return SE->getSCEV(IU.getOperandValToReplace()); +} + +/// getExpr - Return the expression for the use. +const SCEV *IVUsers::getExpr(const IVStrideUse &IU) const { + return + TransformForPostIncUse(Normalize, getReplacementExpr(IU), + IU.getUser(), IU.getOperandValToReplace(), + const_cast<PostIncLoopSet &>(IU.getPostIncLoops()), + *SE, *DT); +} + +static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) { + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + if (AR->getLoop() == L) + return AR; + return findAddRecForLoop(AR->getStart(), L); + } + + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) + if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L)) + return AR; + return 0; + } + + return 0; +} + +const SCEV *IVUsers::getStride(const IVStrideUse &IU, const Loop *L) const { + if (const SCEVAddRecExpr *AR = findAddRecForLoop(getExpr(IU), L)) + return AR->getStepRecurrence(*SE); + return 0; +} + +void IVStrideUse::transformToPostInc(const Loop *L) { + PostIncLoops.insert(L); +} + +void IVStrideUse::deleted() { + // Remove this user from the list. + Parent->IVUses.erase(this); + // this now dangles! +} diff --git a/contrib/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm/lib/Analysis/InlineCost.cpp new file mode 100644 index 0000000..efde598 --- /dev/null +++ b/contrib/llvm/lib/Analysis/InlineCost.cpp @@ -0,0 +1,647 @@ +//===- InlineCost.cpp - Cost analysis for inliner -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements inline cost analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Support/CallSite.h" +#include "llvm/CallingConv.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/ADT/SmallPtrSet.h" + +using namespace llvm; + +/// callIsSmall - If a call is likely to lower to a single target instruction, +/// or is otherwise deemed small return true. +/// TODO: Perhaps calls like memcpy, strcpy, etc? +bool llvm::callIsSmall(const Function *F) { + if (!F) return false; + + if (F->hasLocalLinkage()) return false; + + if (!F->hasName()) return false; + + StringRef Name = F->getName(); + + // These will all likely lower to a single selection DAG node. + if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || + Name == "fabs" || Name == "fabsf" || Name == "fabsl" || + Name == "sin" || Name == "sinf" || Name == "sinl" || + Name == "cos" || Name == "cosf" || Name == "cosl" || + Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ) + return true; + + // These are all likely to be optimized into something smaller. + if (Name == "pow" || Name == "powf" || Name == "powl" || + Name == "exp2" || Name == "exp2l" || Name == "exp2f" || + Name == "floor" || Name == "floorf" || Name == "ceil" || + Name == "round" || Name == "ffs" || Name == "ffsl" || + Name == "abs" || Name == "labs" || Name == "llabs") + return true; + + return false; +} + +/// analyzeBasicBlock - Fill in the current structure with information gleaned +/// from the specified block. +void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { + ++NumBlocks; + unsigned NumInstsBeforeThisBB = NumInsts; + for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); + II != E; ++II) { + if (isa<PHINode>(II)) continue; // PHI nodes don't count. + + // Special handling for calls. + if (isa<CallInst>(II) || isa<InvokeInst>(II)) { + if (isa<DbgInfoIntrinsic>(II)) + continue; // Debug intrinsics don't count as size. + + ImmutableCallSite CS(cast<Instruction>(II)); + + if (const Function *F = CS.getCalledFunction()) { + // If a function is both internal and has a single use, then it is + // extremely likely to get inlined in the future (it was probably + // exposed by an interleaved devirtualization pass). + if (F->hasInternalLinkage() && F->hasOneUse()) + ++NumInlineCandidates; + + // If this call is to function itself, then the function is recursive. + // Inlining it into other functions is a bad idea, because this is + // basically just a form of loop peeling, and our metrics aren't useful + // for that case. + if (F == BB->getParent()) + isRecursive = true; + } + + if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) { + // Each argument to a call takes on average one instruction to set up. + NumInsts += CS.arg_size(); + + // We don't want inline asm to count as a call - that would prevent loop + // unrolling. The argument setup cost is still real, though. + if (!isa<InlineAsm>(CS.getCalledValue())) + ++NumCalls; + } + } + + if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { + if (!AI->isStaticAlloca()) + this->usesDynamicAlloca = true; + } + + if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy()) + ++NumVectorInsts; + + if (const CastInst *CI = dyn_cast<CastInst>(II)) { + // Noop casts, including ptr <-> int, don't count. + if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || + isa<PtrToIntInst>(CI)) + continue; + // Result of a cmp instruction is often extended (to be used by other + // cmp instructions, logical or return instructions). These are usually + // nop on most sane targets. + if (isa<CmpInst>(CI->getOperand(0))) + continue; + } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){ + // If a GEP has all constant indices, it will probably be folded with + // a load/store. + if (GEPI->hasAllConstantIndices()) + continue; + } + + ++NumInsts; + } + + if (isa<ReturnInst>(BB->getTerminator())) + ++NumRets; + + // We never want to inline functions that contain an indirectbr. This is + // incorrect because all the blockaddress's (in static global initializers + // for example) would be referring to the original function, and this indirect + // jump would jump from the inlined copy of the function into the original + // function which is extremely undefined behavior. + if (isa<IndirectBrInst>(BB->getTerminator())) + containsIndirectBr = true; + + // Remember NumInsts for this BB. + NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB; +} + +// CountCodeReductionForConstant - Figure out an approximation for how many +// instructions will be constant folded if the specified value is constant. +// +unsigned CodeMetrics::CountCodeReductionForConstant(Value *V) { + unsigned Reduction = 0; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ + User *U = *UI; + if (isa<BranchInst>(U) || isa<SwitchInst>(U)) { + // We will be able to eliminate all but one of the successors. + const TerminatorInst &TI = cast<TerminatorInst>(*U); + const unsigned NumSucc = TI.getNumSuccessors(); + unsigned Instrs = 0; + for (unsigned I = 0; I != NumSucc; ++I) + Instrs += NumBBInsts[TI.getSuccessor(I)]; + // We don't know which blocks will be eliminated, so use the average size. + Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc; + } else { + // Figure out if this instruction will be removed due to simple constant + // propagation. + Instruction &Inst = cast<Instruction>(*U); + + // We can't constant propagate instructions which have effects or + // read memory. + // + // FIXME: It would be nice to capture the fact that a load from a + // pointer-to-constant-global is actually a *really* good thing to zap. + // Unfortunately, we don't know the pointer that may get propagated here, + // so we can't make this decision. + if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || + isa<AllocaInst>(Inst)) + continue; + + bool AllOperandsConstant = true; + for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) + if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) { + AllOperandsConstant = false; + break; + } + + if (AllOperandsConstant) { + // We will get to remove this instruction... + Reduction += InlineConstants::InstrCost; + + // And any other instructions that use it which become constants + // themselves. + Reduction += CountCodeReductionForConstant(&Inst); + } + } + } + return Reduction; +} + +// CountCodeReductionForAlloca - Figure out an approximation of how much smaller +// the function will be if it is inlined into a context where an argument +// becomes an alloca. +// +unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) { + if (!V->getType()->isPointerTy()) return 0; // Not a pointer + unsigned Reduction = 0; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ + Instruction *I = cast<Instruction>(*UI); + if (isa<LoadInst>(I) || isa<StoreInst>(I)) + Reduction += InlineConstants::InstrCost; + else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { + // If the GEP has variable indices, we won't be able to do much with it. + if (GEP->hasAllConstantIndices()) + Reduction += CountCodeReductionForAlloca(GEP); + } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) { + // Track pointer through bitcasts. + Reduction += CountCodeReductionForAlloca(BCI); + } else { + // If there is some other strange instruction, we're not going to be able + // to do much if we inline this. + return 0; + } + } + + return Reduction; +} + +/// analyzeFunction - Fill in the current structure with information gleaned +/// from the specified function. +void CodeMetrics::analyzeFunction(Function *F) { + // If this function contains a call to setjmp or _setjmp, never inline + // it. This is a hack because we depend on the user marking their local + // variables as volatile if they are live across a setjmp call, and they + // probably won't do this in callers. + if (F->callsFunctionThatReturnsTwice()) + callsSetJmp = true; + + // Look at the size of the callee. + for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + analyzeBasicBlock(&*BB); +} + +/// analyzeFunction - Fill in the current structure with information gleaned +/// from the specified function. +void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { + Metrics.analyzeFunction(F); + + // A function with exactly one return has it removed during the inlining + // process (see InlineFunction), so don't count it. + // FIXME: This knowledge should really be encoded outside of FunctionInfo. + if (Metrics.NumRets==1) + --Metrics.NumInsts; + + // Check out all of the arguments to the function, figuring out how much + // code can be eliminated if one of the arguments is a constant. + ArgumentWeights.reserve(F->arg_size()); + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) + ArgumentWeights.push_back(ArgInfo(Metrics.CountCodeReductionForConstant(I), + Metrics.CountCodeReductionForAlloca(I))); +} + +/// NeverInline - returns true if the function should never be inlined into +/// any caller +bool InlineCostAnalyzer::FunctionInfo::NeverInline() { + return (Metrics.callsSetJmp || Metrics.isRecursive || + Metrics.containsIndirectBr); +} +// getSpecializationBonus - The heuristic used to determine the per-call +// performance boost for using a specialization of Callee with argument +// specializedArgNo replaced by a constant. +int InlineCostAnalyzer::getSpecializationBonus(Function *Callee, + SmallVectorImpl<unsigned> &SpecializedArgNos) +{ + if (Callee->mayBeOverridden()) + return 0; + + int Bonus = 0; + // If this function uses the coldcc calling convention, prefer not to + // specialize it. + if (Callee->getCallingConv() == CallingConv::Cold) + Bonus -= InlineConstants::ColdccPenalty; + + // Get information about the callee. + FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; + + // If we haven't calculated this information yet, do so now. + if (CalleeFI->Metrics.NumBlocks == 0) + CalleeFI->analyzeFunction(Callee); + + unsigned ArgNo = 0; + unsigned i = 0; + for (Function::arg_iterator I = Callee->arg_begin(), E = Callee->arg_end(); + I != E; ++I, ++ArgNo) + if (ArgNo == SpecializedArgNos[i]) { + ++i; + Bonus += CountBonusForConstant(I); + } + + // Calls usually take a long time, so they make the specialization gain + // smaller. + Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty; + + return Bonus; +} + +// ConstantFunctionBonus - Figure out how much of a bonus we can get for +// possibly devirtualizing a function. We'll subtract the size of the function +// we may wish to inline from the indirect call bonus providing a limit on +// growth. Leave an upper limit of 0 for the bonus - we don't want to penalize +// inlining because we decide we don't want to give a bonus for +// devirtualizing. +int InlineCostAnalyzer::ConstantFunctionBonus(CallSite CS, Constant *C) { + + // This could just be NULL. + if (!C) return 0; + + Function *F = dyn_cast<Function>(C); + if (!F) return 0; + + int Bonus = InlineConstants::IndirectCallBonus + getInlineSize(CS, F); + return (Bonus > 0) ? 0 : Bonus; +} + +// CountBonusForConstant - Figure out an approximation for how much per-call +// performance boost we can expect if the specified value is constant. +int InlineCostAnalyzer::CountBonusForConstant(Value *V, Constant *C) { + unsigned Bonus = 0; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ + User *U = *UI; + if (CallInst *CI = dyn_cast<CallInst>(U)) { + // Turning an indirect call into a direct call is a BIG win + if (CI->getCalledValue() == V) + Bonus += ConstantFunctionBonus(CallSite(CI), C); + } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) { + // Turning an indirect call into a direct call is a BIG win + if (II->getCalledValue() == V) + Bonus += ConstantFunctionBonus(CallSite(II), C); + } + // FIXME: Eliminating conditional branches and switches should + // also yield a per-call performance boost. + else { + // Figure out the bonuses that wll accrue due to simple constant + // propagation. + Instruction &Inst = cast<Instruction>(*U); + + // We can't constant propagate instructions which have effects or + // read memory. + // + // FIXME: It would be nice to capture the fact that a load from a + // pointer-to-constant-global is actually a *really* good thing to zap. + // Unfortunately, we don't know the pointer that may get propagated here, + // so we can't make this decision. + if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || + isa<AllocaInst>(Inst)) + continue; + + bool AllOperandsConstant = true; + for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) + if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) { + AllOperandsConstant = false; + break; + } + + if (AllOperandsConstant) + Bonus += CountBonusForConstant(&Inst); + } + } + + return Bonus; +} + +int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) { + // Get information about the callee. + FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; + + // If we haven't calculated this information yet, do so now. + if (CalleeFI->Metrics.NumBlocks == 0) + CalleeFI->analyzeFunction(Callee); + + // InlineCost - This value measures how good of an inline candidate this call + // site is to inline. A lower inline cost make is more likely for the call to + // be inlined. This value may go negative. + // + int InlineCost = 0; + + // Compute any size reductions we can expect due to arguments being passed into + // the function. + // + unsigned ArgNo = 0; + CallSite::arg_iterator I = CS.arg_begin(); + for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end(); + FI != FE; ++I, ++FI, ++ArgNo) { + + // If an alloca is passed in, inlining this function is likely to allow + // significant future optimization possibilities (like scalar promotion, and + // scalarization), so encourage the inlining of the function. + // + if (isa<AllocaInst>(I)) + InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight; + + // If this is a constant being passed into the function, use the argument + // weights calculated for the callee to determine how much will be folded + // away with this information. + else if (isa<Constant>(I)) + InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight; + } + + // Each argument passed in has a cost at both the caller and the callee + // sides. Measurements show that each argument costs about the same as an + // instruction. + InlineCost -= (CS.arg_size() * InlineConstants::InstrCost); + + // Now that we have considered all of the factors that make the call site more + // likely to be inlined, look at factors that make us not want to inline it. + + // Calls usually take a long time, so they make the inlining gain smaller. + InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty; + + // Look at the size of the callee. Each instruction counts as 5. + InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost; + + return InlineCost; +} + +int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) { + // Get information about the callee. + FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; + + // If we haven't calculated this information yet, do so now. + if (CalleeFI->Metrics.NumBlocks == 0) + CalleeFI->analyzeFunction(Callee); + + bool isDirectCall = CS.getCalledFunction() == Callee; + Instruction *TheCall = CS.getInstruction(); + int Bonus = 0; + + // If there is only one call of the function, and it has internal linkage, + // make it almost guaranteed to be inlined. + // + if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall) + Bonus += InlineConstants::LastCallToStaticBonus; + + // If the instruction after the call, or if the normal destination of the + // invoke is an unreachable instruction, the function is noreturn. As such, + // there is little point in inlining this. + if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { + if (isa<UnreachableInst>(II->getNormalDest()->begin())) + Bonus += InlineConstants::NoreturnPenalty; + } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall))) + Bonus += InlineConstants::NoreturnPenalty; + + // If this function uses the coldcc calling convention, prefer not to inline + // it. + if (Callee->getCallingConv() == CallingConv::Cold) + Bonus += InlineConstants::ColdccPenalty; + + // Add to the inline quality for properties that make the call valuable to + // inline. This includes factors that indicate that the result of inlining + // the function will be optimizable. Currently this just looks at arguments + // passed into the function. + // + CallSite::arg_iterator I = CS.arg_begin(); + for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end(); + FI != FE; ++I, ++FI) + // Compute any constant bonus due to inlining we want to give here. + if (isa<Constant>(I)) + Bonus += CountBonusForConstant(FI, cast<Constant>(I)); + + return Bonus; +} + +// getInlineCost - The heuristic used to determine if we should inline the +// function call or not. +// +InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, + SmallPtrSet<const Function*, 16> &NeverInline) { + return getInlineCost(CS, CS.getCalledFunction(), NeverInline); +} + +InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, + Function *Callee, + SmallPtrSet<const Function*, 16> &NeverInline) { + Instruction *TheCall = CS.getInstruction(); + Function *Caller = TheCall->getParent()->getParent(); + + // Don't inline functions which can be redefined at link-time to mean + // something else. Don't inline functions marked noinline or call sites + // marked noinline. + if (Callee->mayBeOverridden() || + Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee) || + CS.isNoInline()) + return llvm::InlineCost::getNever(); + + // Get information about the callee. + FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; + + // If we haven't calculated this information yet, do so now. + if (CalleeFI->Metrics.NumBlocks == 0) + CalleeFI->analyzeFunction(Callee); + + // If we should never inline this, return a huge cost. + if (CalleeFI->NeverInline()) + return InlineCost::getNever(); + + // FIXME: It would be nice to kill off CalleeFI->NeverInline. Then we + // could move this up and avoid computing the FunctionInfo for + // things we are going to just return always inline for. This + // requires handling setjmp somewhere else, however. + if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline)) + return InlineCost::getAlways(); + + if (CalleeFI->Metrics.usesDynamicAlloca) { + // Get information about the caller. + FunctionInfo &CallerFI = CachedFunctionInfo[Caller]; + + // If we haven't calculated this information yet, do so now. + if (CallerFI.Metrics.NumBlocks == 0) { + CallerFI.analyzeFunction(Caller); + + // Recompute the CalleeFI pointer, getting Caller could have invalidated + // it. + CalleeFI = &CachedFunctionInfo[Callee]; + } + + // Don't inline a callee with dynamic alloca into a caller without them. + // Functions containing dynamic alloca's are inefficient in various ways; + // don't create more inefficiency. + if (!CallerFI.Metrics.usesDynamicAlloca) + return InlineCost::getNever(); + } + + // InlineCost - This value measures how good of an inline candidate this call + // site is to inline. A lower inline cost make is more likely for the call to + // be inlined. This value may go negative due to the fact that bonuses + // are negative numbers. + // + int InlineCost = getInlineSize(CS, Callee) + getInlineBonuses(CS, Callee); + return llvm::InlineCost::get(InlineCost); +} + +// getSpecializationCost - The heuristic used to determine the code-size +// impact of creating a specialized version of Callee with argument +// SpecializedArgNo replaced by a constant. +InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee, + SmallVectorImpl<unsigned> &SpecializedArgNos) +{ + // Don't specialize functions which can be redefined at link-time to mean + // something else. + if (Callee->mayBeOverridden()) + return llvm::InlineCost::getNever(); + + // Get information about the callee. + FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; + + // If we haven't calculated this information yet, do so now. + if (CalleeFI->Metrics.NumBlocks == 0) + CalleeFI->analyzeFunction(Callee); + + int Cost = 0; + + // Look at the original size of the callee. Each instruction counts as 5. + Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost; + + // Offset that with the amount of code that can be constant-folded + // away with the given arguments replaced by constants. + for (SmallVectorImpl<unsigned>::iterator an = SpecializedArgNos.begin(), + ae = SpecializedArgNos.end(); an != ae; ++an) + Cost -= CalleeFI->ArgumentWeights[*an].ConstantWeight; + + return llvm::InlineCost::get(Cost); +} + +// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a +// higher threshold to determine if the function call should be inlined. +float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) { + Function *Callee = CS.getCalledFunction(); + + // Get information about the callee. + FunctionInfo &CalleeFI = CachedFunctionInfo[Callee]; + + // If we haven't calculated this information yet, do so now. + if (CalleeFI.Metrics.NumBlocks == 0) + CalleeFI.analyzeFunction(Callee); + + float Factor = 1.0f; + // Single BB functions are often written to be inlined. + if (CalleeFI.Metrics.NumBlocks == 1) + Factor += 0.5f; + + // Be more aggressive if the function contains a good chunk (if it mades up + // at least 10% of the instructions) of vector instructions. + if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/2) + Factor += 2.0f; + else if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/10) + Factor += 1.5f; + return Factor; +} + +/// growCachedCostInfo - update the cached cost info for Caller after Callee has +/// been inlined. +void +InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) { + CodeMetrics &CallerMetrics = CachedFunctionInfo[Caller].Metrics; + + // For small functions we prefer to recalculate the cost for better accuracy. + if (CallerMetrics.NumBlocks < 10 && CallerMetrics.NumInsts < 1000) { + resetCachedCostInfo(Caller); + return; + } + + // For large functions, we can save a lot of computation time by skipping + // recalculations. + if (CallerMetrics.NumCalls > 0) + --CallerMetrics.NumCalls; + + if (Callee == 0) return; + + CodeMetrics &CalleeMetrics = CachedFunctionInfo[Callee].Metrics; + + // If we don't have metrics for the callee, don't recalculate them just to + // update an approximation in the caller. Instead, just recalculate the + // caller info from scratch. + if (CalleeMetrics.NumBlocks == 0) { + resetCachedCostInfo(Caller); + return; + } + + // Since CalleeMetrics were already calculated, we know that the CallerMetrics + // reference isn't invalidated: both were in the DenseMap. + CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca; + + // FIXME: If any of these three are true for the callee, the callee was + // not inlined into the caller, so I think they're redundant here. + CallerMetrics.callsSetJmp |= CalleeMetrics.callsSetJmp; + CallerMetrics.isRecursive |= CalleeMetrics.isRecursive; + CallerMetrics.containsIndirectBr |= CalleeMetrics.containsIndirectBr; + + CallerMetrics.NumInsts += CalleeMetrics.NumInsts; + CallerMetrics.NumBlocks += CalleeMetrics.NumBlocks; + CallerMetrics.NumCalls += CalleeMetrics.NumCalls; + CallerMetrics.NumVectorInsts += CalleeMetrics.NumVectorInsts; + CallerMetrics.NumRets += CalleeMetrics.NumRets; + + // analyzeBasicBlock counts each function argument as an inst. + if (CallerMetrics.NumInsts >= Callee->arg_size()) + CallerMetrics.NumInsts -= Callee->arg_size(); + else + CallerMetrics.NumInsts = 0; + + // We are not updating the argument weights. We have already determined that + // Caller is a fairly large function, so we accept the loss of precision. +} + +/// clear - empty the cache of inline costs +void InlineCostAnalyzer::clear() { + CachedFunctionInfo.clear(); +} diff --git a/contrib/llvm/lib/Analysis/InstCount.cpp b/contrib/llvm/lib/Analysis/InstCount.cpp new file mode 100644 index 0000000..3b385d2 --- /dev/null +++ b/contrib/llvm/lib/Analysis/InstCount.cpp @@ -0,0 +1,87 @@ +//===-- InstCount.cpp - Collects the count of all instructions ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass collects the count of all instructions and reports them +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "instcount" +#include "llvm/Analysis/Passes.h" +#include "llvm/Pass.h" +#include "llvm/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/InstVisitor.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(TotalInsts , "Number of instructions (of all types)"); +STATISTIC(TotalBlocks, "Number of basic blocks"); +STATISTIC(TotalFuncs , "Number of non-external functions"); +STATISTIC(TotalMemInst, "Number of memory instructions"); + +#define HANDLE_INST(N, OPCODE, CLASS) \ + STATISTIC(Num ## OPCODE ## Inst, "Number of " #OPCODE " insts"); + +#include "llvm/Instruction.def" + + +namespace { + class InstCount : public FunctionPass, public InstVisitor<InstCount> { + friend class InstVisitor<InstCount>; + + void visitFunction (Function &F) { ++TotalFuncs; } + void visitBasicBlock(BasicBlock &BB) { ++TotalBlocks; } + +#define HANDLE_INST(N, OPCODE, CLASS) \ + void visit##OPCODE(CLASS &) { ++Num##OPCODE##Inst; ++TotalInsts; } + +#include "llvm/Instruction.def" + + void visitInstruction(Instruction &I) { + errs() << "Instruction Count does not know about " << I; + llvm_unreachable(0); + } + public: + static char ID; // Pass identification, replacement for typeid + InstCount() : FunctionPass(ID) { + initializeInstCountPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + virtual void print(raw_ostream &O, const Module *M) const {} + + }; +} + +char InstCount::ID = 0; +INITIALIZE_PASS(InstCount, "instcount", + "Counts the various types of Instructions", false, true) + +FunctionPass *llvm::createInstCountPass() { return new InstCount(); } + +// InstCount::run - This is the main Analysis entry point for a +// function. +// +bool InstCount::runOnFunction(Function &F) { + unsigned StartMemInsts = + NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst + + NumInvokeInst + NumAllocaInst; + visit(F); + unsigned EndMemInsts = + NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst + + NumInvokeInst + NumAllocaInst; + TotalMemInst += EndMemInsts-StartMemInsts; + return false; +} diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp new file mode 100644 index 0000000..8709f6b --- /dev/null +++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp @@ -0,0 +1,2526 @@ +//===- InstructionSimplify.cpp - Fold instruction operands ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements routines for folding instructions into simpler forms +// that do not require creating new instructions. This does constant folding +// ("add i32 1, 1" -> "2") but can also handle non-constant operands, either +// returning a constant ("and i32 %x, 0" -> "0") or an already existing value +// ("and i32 %x, %x" -> "%x"). All operands are assumed to have already been +// simplified: This is usually true and assuming it simplifies the logic (if +// they have not been simplified then results are correct but maybe suboptimal). +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "instsimplify" +#include "llvm/Operator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Support/ConstantRange.h" +#include "llvm/Support/PatternMatch.h" +#include "llvm/Support/ValueHandle.h" +#include "llvm/Target/TargetData.h" +using namespace llvm; +using namespace llvm::PatternMatch; + +enum { RecursionLimit = 3 }; + +STATISTIC(NumExpand, "Number of expansions"); +STATISTIC(NumFactor , "Number of factorizations"); +STATISTIC(NumReassoc, "Number of reassociations"); + +static Value *SimplifyAndInst(Value *, Value *, const TargetData *, + const DominatorTree *, unsigned); +static Value *SimplifyBinOp(unsigned, Value *, Value *, const TargetData *, + const DominatorTree *, unsigned); +static Value *SimplifyCmpInst(unsigned, Value *, Value *, const TargetData *, + const DominatorTree *, unsigned); +static Value *SimplifyOrInst(Value *, Value *, const TargetData *, + const DominatorTree *, unsigned); +static Value *SimplifyXorInst(Value *, Value *, const TargetData *, + const DominatorTree *, unsigned); + +/// ValueDominatesPHI - Does the given value dominate the specified phi node? +static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { + Instruction *I = dyn_cast<Instruction>(V); + if (!I) + // Arguments and constants dominate all instructions. + return true; + + // If we have a DominatorTree then do a precise test. + if (DT) + return DT->dominates(I, P); + + // Otherwise, if the instruction is in the entry block, and is not an invoke, + // then it obviously dominates all phi nodes. + if (I->getParent() == &I->getParent()->getParent()->getEntryBlock() && + !isa<InvokeInst>(I)) + return true; + + return false; +} + +/// ExpandBinOp - Simplify "A op (B op' C)" by distributing op over op', turning +/// it into "(A op B) op' (A op C)". Here "op" is given by Opcode and "op'" is +/// given by OpcodeToExpand, while "A" corresponds to LHS and "B op' C" to RHS. +/// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)". +/// Returns the simplified value, or null if no simplification was performed. +static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, + unsigned OpcToExpand, const TargetData *TD, + const DominatorTree *DT, unsigned MaxRecurse) { + Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand; + // Recursion is always used, so bail out at once if we already hit the limit. + if (!MaxRecurse--) + return 0; + + // Check whether the expression has the form "(A op' B) op C". + if (BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS)) + if (Op0->getOpcode() == OpcodeToExpand) { + // It does! Try turning it into "(A op C) op' (B op C)". + Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS; + // Do "A op C" and "B op C" both simplify? + if (Value *L = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse)) + if (Value *R = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) { + // They do! Return "L op' R" if it simplifies or is already available. + // If "L op' R" equals "A op' B" then "L op' R" is just the LHS. + if ((L == A && R == B) || (Instruction::isCommutative(OpcodeToExpand) + && L == B && R == A)) { + ++NumExpand; + return LHS; + } + // Otherwise return "L op' R" if it simplifies. + if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT, + MaxRecurse)) { + ++NumExpand; + return V; + } + } + } + + // Check whether the expression has the form "A op (B op' C)". + if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS)) + if (Op1->getOpcode() == OpcodeToExpand) { + // It does! Try turning it into "(A op B) op' (A op C)". + Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1); + // Do "A op B" and "A op C" both simplify? + if (Value *L = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse)) + if (Value *R = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse)) { + // They do! Return "L op' R" if it simplifies or is already available. + // If "L op' R" equals "B op' C" then "L op' R" is just the RHS. + if ((L == B && R == C) || (Instruction::isCommutative(OpcodeToExpand) + && L == C && R == B)) { + ++NumExpand; + return RHS; + } + // Otherwise return "L op' R" if it simplifies. + if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT, + MaxRecurse)) { + ++NumExpand; + return V; + } + } + } + + return 0; +} + +/// FactorizeBinOp - Simplify "LHS Opcode RHS" by factorizing out a common term +/// using the operation OpCodeToExtract. For example, when Opcode is Add and +/// OpCodeToExtract is Mul then this tries to turn "(A*B)+(A*C)" into "A*(B+C)". +/// Returns the simplified value, or null if no simplification was performed. +static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, + unsigned OpcToExtract, const TargetData *TD, + const DominatorTree *DT, unsigned MaxRecurse) { + Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract; + // Recursion is always used, so bail out at once if we already hit the limit. + if (!MaxRecurse--) + return 0; + + BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS); + BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS); + + if (!Op0 || Op0->getOpcode() != OpcodeToExtract || + !Op1 || Op1->getOpcode() != OpcodeToExtract) + return 0; + + // The expression has the form "(A op' B) op (C op' D)". + Value *A = Op0->getOperand(0), *B = Op0->getOperand(1); + Value *C = Op1->getOperand(0), *D = Op1->getOperand(1); + + // Use left distributivity, i.e. "X op' (Y op Z) = (X op' Y) op (X op' Z)". + // Does the instruction have the form "(A op' B) op (A op' D)" or, in the + // commutative case, "(A op' B) op (C op' A)"? + if (A == C || (Instruction::isCommutative(OpcodeToExtract) && A == D)) { + Value *DD = A == C ? D : C; + // Form "A op' (B op DD)" if it simplifies completely. + // Does "B op DD" simplify? + if (Value *V = SimplifyBinOp(Opcode, B, DD, TD, DT, MaxRecurse)) { + // It does! Return "A op' V" if it simplifies or is already available. + // If V equals B then "A op' V" is just the LHS. If V equals DD then + // "A op' V" is just the RHS. + if (V == B || V == DD) { + ++NumFactor; + return V == B ? LHS : RHS; + } + // Otherwise return "A op' V" if it simplifies. + if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, TD, DT, MaxRecurse)) { + ++NumFactor; + return W; + } + } + } + + // Use right distributivity, i.e. "(X op Y) op' Z = (X op' Z) op (Y op' Z)". + // Does the instruction have the form "(A op' B) op (C op' B)" or, in the + // commutative case, "(A op' B) op (B op' D)"? + if (B == D || (Instruction::isCommutative(OpcodeToExtract) && B == C)) { + Value *CC = B == D ? C : D; + // Form "(A op CC) op' B" if it simplifies completely.. + // Does "A op CC" simplify? + if (Value *V = SimplifyBinOp(Opcode, A, CC, TD, DT, MaxRecurse)) { + // It does! Return "V op' B" if it simplifies or is already available. + // If V equals A then "V op' B" is just the LHS. If V equals CC then + // "V op' B" is just the RHS. + if (V == A || V == CC) { + ++NumFactor; + return V == A ? LHS : RHS; + } + // Otherwise return "V op' B" if it simplifies. + if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, TD, DT, MaxRecurse)) { + ++NumFactor; + return W; + } + } + } + + return 0; +} + +/// SimplifyAssociativeBinOp - Generic simplifications for associative binary +/// operations. Returns the simpler value, or null if none was found. +static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, + const TargetData *TD, + const DominatorTree *DT, + unsigned MaxRecurse) { + Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc; + assert(Instruction::isAssociative(Opcode) && "Not an associative operation!"); + + // Recursion is always used, so bail out at once if we already hit the limit. + if (!MaxRecurse--) + return 0; + + BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS); + BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS); + + // Transform: "(A op B) op C" ==> "A op (B op C)" if it simplifies completely. + if (Op0 && Op0->getOpcode() == Opcode) { + Value *A = Op0->getOperand(0); + Value *B = Op0->getOperand(1); + Value *C = RHS; + + // Does "B op C" simplify? + if (Value *V = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) { + // It does! Return "A op V" if it simplifies or is already available. + // If V equals B then "A op V" is just the LHS. + if (V == B) return LHS; + // Otherwise return "A op V" if it simplifies. + if (Value *W = SimplifyBinOp(Opcode, A, V, TD, DT, MaxRecurse)) { + ++NumReassoc; + return W; + } + } + } + + // Transform: "A op (B op C)" ==> "(A op B) op C" if it simplifies completely. + if (Op1 && Op1->getOpcode() == Opcode) { + Value *A = LHS; + Value *B = Op1->getOperand(0); + Value *C = Op1->getOperand(1); + + // Does "A op B" simplify? + if (Value *V = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse)) { + // It does! Return "V op C" if it simplifies or is already available. + // If V equals B then "V op C" is just the RHS. + if (V == B) return RHS; + // Otherwise return "V op C" if it simplifies. + if (Value *W = SimplifyBinOp(Opcode, V, C, TD, DT, MaxRecurse)) { + ++NumReassoc; + return W; + } + } + } + + // The remaining transforms require commutativity as well as associativity. + if (!Instruction::isCommutative(Opcode)) + return 0; + + // Transform: "(A op B) op C" ==> "(C op A) op B" if it simplifies completely. + if (Op0 && Op0->getOpcode() == Opcode) { + Value *A = Op0->getOperand(0); + Value *B = Op0->getOperand(1); + Value *C = RHS; + + // Does "C op A" simplify? + if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) { + // It does! Return "V op B" if it simplifies or is already available. + // If V equals A then "V op B" is just the LHS. + if (V == A) return LHS; + // Otherwise return "V op B" if it simplifies. + if (Value *W = SimplifyBinOp(Opcode, V, B, TD, DT, MaxRecurse)) { + ++NumReassoc; + return W; + } + } + } + + // Transform: "A op (B op C)" ==> "B op (C op A)" if it simplifies completely. + if (Op1 && Op1->getOpcode() == Opcode) { + Value *A = LHS; + Value *B = Op1->getOperand(0); + Value *C = Op1->getOperand(1); + + // Does "C op A" simplify? + if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) { + // It does! Return "B op V" if it simplifies or is already available. + // If V equals C then "B op V" is just the RHS. + if (V == C) return RHS; + // Otherwise return "B op V" if it simplifies. + if (Value *W = SimplifyBinOp(Opcode, B, V, TD, DT, MaxRecurse)) { + ++NumReassoc; + return W; + } + } + } + + return 0; +} + +/// ThreadBinOpOverSelect - In the case of a binary operation with a select +/// instruction as an operand, try to simplify the binop by seeing whether +/// evaluating it on both branches of the select results in the same value. +/// Returns the common value if so, otherwise returns null. +static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS, + const TargetData *TD, + const DominatorTree *DT, + unsigned MaxRecurse) { + // Recursion is always used, so bail out at once if we already hit the limit. + if (!MaxRecurse--) + return 0; + + SelectInst *SI; + if (isa<SelectInst>(LHS)) { + SI = cast<SelectInst>(LHS); + } else { + assert(isa<SelectInst>(RHS) && "No select instruction operand!"); + SI = cast<SelectInst>(RHS); + } + + // Evaluate the BinOp on the true and false branches of the select. + Value *TV; + Value *FV; + if (SI == LHS) { + TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, TD, DT, MaxRecurse); + FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, TD, DT, MaxRecurse); + } else { + TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), TD, DT, MaxRecurse); + FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), TD, DT, MaxRecurse); + } + + // If they simplified to the same value, then return the common value. + // If they both failed to simplify then return null. + if (TV == FV) + return TV; + + // If one branch simplified to undef, return the other one. + if (TV && isa<UndefValue>(TV)) + return FV; + if (FV && isa<UndefValue>(FV)) + return TV; + + // If applying the operation did not change the true and false select values, + // then the result of the binop is the select itself. + if (TV == SI->getTrueValue() && FV == SI->getFalseValue()) + return SI; + + // If one branch simplified and the other did not, and the simplified + // value is equal to the unsimplified one, return the simplified value. + // For example, select (cond, X, X & Z) & Z -> X & Z. + if ((FV && !TV) || (TV && !FV)) { + // Check that the simplified value has the form "X op Y" where "op" is the + // same as the original operation. + Instruction *Simplified = dyn_cast<Instruction>(FV ? FV : TV); + if (Simplified && Simplified->getOpcode() == Opcode) { + // The value that didn't simplify is "UnsimplifiedLHS op UnsimplifiedRHS". + // We already know that "op" is the same as for the simplified value. See + // if the operands match too. If so, return the simplified value. + Value *UnsimplifiedBranch = FV ? SI->getTrueValue() : SI->getFalseValue(); + Value *UnsimplifiedLHS = SI == LHS ? UnsimplifiedBranch : LHS; + Value *UnsimplifiedRHS = SI == LHS ? RHS : UnsimplifiedBranch; + if (Simplified->getOperand(0) == UnsimplifiedLHS && + Simplified->getOperand(1) == UnsimplifiedRHS) + return Simplified; + if (Simplified->isCommutative() && + Simplified->getOperand(1) == UnsimplifiedLHS && + Simplified->getOperand(0) == UnsimplifiedRHS) + return Simplified; + } + } + + return 0; +} + +/// ThreadCmpOverSelect - In the case of a comparison with a select instruction, +/// try to simplify the comparison by seeing whether both branches of the select +/// result in the same value. Returns the common value if so, otherwise returns +/// null. +static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, + Value *RHS, const TargetData *TD, + const DominatorTree *DT, + unsigned MaxRecurse) { + // Recursion is always used, so bail out at once if we already hit the limit. + if (!MaxRecurse--) + return 0; + + // Make sure the select is on the LHS. + if (!isa<SelectInst>(LHS)) { + std::swap(LHS, RHS); + Pred = CmpInst::getSwappedPredicate(Pred); + } + assert(isa<SelectInst>(LHS) && "Not comparing with a select instruction!"); + SelectInst *SI = cast<SelectInst>(LHS); + + // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it. + // Does "cmp TV, RHS" simplify? + if (Value *TCmp = SimplifyCmpInst(Pred, SI->getTrueValue(), RHS, TD, DT, + MaxRecurse)) { + // It does! Does "cmp FV, RHS" simplify? + if (Value *FCmp = SimplifyCmpInst(Pred, SI->getFalseValue(), RHS, TD, DT, + MaxRecurse)) { + // It does! If they simplified to the same value, then use it as the + // result of the original comparison. + if (TCmp == FCmp) + return TCmp; + Value *Cond = SI->getCondition(); + // If the false value simplified to false, then the result of the compare + // is equal to "Cond && TCmp". This also catches the case when the false + // value simplified to false and the true value to true, returning "Cond". + if (match(FCmp, m_Zero())) + if (Value *V = SimplifyAndInst(Cond, TCmp, TD, DT, MaxRecurse)) + return V; + // If the true value simplified to true, then the result of the compare + // is equal to "Cond || FCmp". + if (match(TCmp, m_One())) + if (Value *V = SimplifyOrInst(Cond, FCmp, TD, DT, MaxRecurse)) + return V; + // Finally, if the false value simplified to true and the true value to + // false, then the result of the compare is equal to "!Cond". + if (match(FCmp, m_One()) && match(TCmp, m_Zero())) + if (Value *V = + SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()), + TD, DT, MaxRecurse)) + return V; + } + } + + return 0; +} + +/// ThreadBinOpOverPHI - In the case of a binary operation with an operand that +/// is a PHI instruction, try to simplify the binop by seeing whether evaluating +/// it on the incoming phi values yields the same result for every value. If so +/// returns the common value, otherwise returns null. +static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { + // Recursion is always used, so bail out at once if we already hit the limit. + if (!MaxRecurse--) + return 0; + + PHINode *PI; + if (isa<PHINode>(LHS)) { + PI = cast<PHINode>(LHS); + // Bail out if RHS and the phi may be mutually interdependent due to a loop. + if (!ValueDominatesPHI(RHS, PI, DT)) + return 0; + } else { + assert(isa<PHINode>(RHS) && "No PHI instruction operand!"); + PI = cast<PHINode>(RHS); + // Bail out if LHS and the phi may be mutually interdependent due to a loop. + if (!ValueDominatesPHI(LHS, PI, DT)) + return 0; + } + + // Evaluate the BinOp on the incoming phi values. + Value *CommonValue = 0; + for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) { + Value *Incoming = PI->getIncomingValue(i); + // If the incoming value is the phi node itself, it can safely be skipped. + if (Incoming == PI) continue; + Value *V = PI == LHS ? + SimplifyBinOp(Opcode, Incoming, RHS, TD, DT, MaxRecurse) : + SimplifyBinOp(Opcode, LHS, Incoming, TD, DT, MaxRecurse); + // If the operation failed to simplify, or simplified to a different value + // to previously, then give up. + if (!V || (CommonValue && V != CommonValue)) + return 0; + CommonValue = V; + } + + return CommonValue; +} + +/// ThreadCmpOverPHI - In the case of a comparison with a PHI instruction, try +/// try to simplify the comparison by seeing whether comparing with all of the +/// incoming phi values yields the same result every time. If so returns the +/// common result, otherwise returns null. +static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { + // Recursion is always used, so bail out at once if we already hit the limit. + if (!MaxRecurse--) + return 0; + + // Make sure the phi is on the LHS. + if (!isa<PHINode>(LHS)) { + std::swap(LHS, RHS); + Pred = CmpInst::getSwappedPredicate(Pred); + } + assert(isa<PHINode>(LHS) && "Not comparing with a phi instruction!"); + PHINode *PI = cast<PHINode>(LHS); + + // Bail out if RHS and the phi may be mutually interdependent due to a loop. + if (!ValueDominatesPHI(RHS, PI, DT)) + return 0; + + // Evaluate the BinOp on the incoming phi values. + Value *CommonValue = 0; + for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) { + Value *Incoming = PI->getIncomingValue(i); + // If the incoming value is the phi node itself, it can safely be skipped. + if (Incoming == PI) continue; + Value *V = SimplifyCmpInst(Pred, Incoming, RHS, TD, DT, MaxRecurse); + // If the operation failed to simplify, or simplified to a different value + // to previously, then give up. + if (!V || (CommonValue && V != CommonValue)) + return 0; + CommonValue = V; + } + + return CommonValue; +} + +/// SimplifyAddInst - Given operands for an Add, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) { + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(), + Ops, 2, TD); + } + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + // X + undef -> undef + if (match(Op1, m_Undef())) + return Op1; + + // X + 0 -> X + if (match(Op1, m_Zero())) + return Op0; + + // X + (Y - X) -> Y + // (Y - X) + X -> Y + // Eg: X + -X -> 0 + Value *Y = 0; + if (match(Op1, m_Sub(m_Value(Y), m_Specific(Op0))) || + match(Op0, m_Sub(m_Value(Y), m_Specific(Op1)))) + return Y; + + // X + ~X -> -1 since ~X = -X-1 + if (match(Op0, m_Not(m_Specific(Op1))) || + match(Op1, m_Not(m_Specific(Op0)))) + return Constant::getAllOnesValue(Op0->getType()); + + /// i1 add -> xor. + if (MaxRecurse && Op0->getType()->isIntegerTy(1)) + if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1)) + return V; + + // Try some generic simplifications for associative operations. + if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, TD, DT, + MaxRecurse)) + return V; + + // Mul distributes over Add. Try some generic simplifications based on this. + if (Value *V = FactorizeBinOp(Instruction::Add, Op0, Op1, Instruction::Mul, + TD, DT, MaxRecurse)) + return V; + + // Threading Add over selects and phi nodes is pointless, so don't bother. + // Threading over the select in "A + select(cond, B, C)" means evaluating + // "A+B" and "A+C" and seeing if they are equal; but they are equal if and + // only if B and C are equal. If B and C are equal then (since we assume + // that operands have already been simplified) "select(cond, B, C)" should + // have been simplified to the common value of B and C already. Analysing + // "A+B" and "A+C" thus gains nothing, but costs compile time. Similarly + // for threading over phi nodes. + + return 0; +} + +Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const TargetData *TD, const DominatorTree *DT) { + return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit); +} + +/// SimplifySubInst - Given operands for a Sub, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(), + Ops, 2, TD); + } + + // X - undef -> undef + // undef - X -> undef + if (match(Op0, m_Undef()) || match(Op1, m_Undef())) + return UndefValue::get(Op0->getType()); + + // X - 0 -> X + if (match(Op1, m_Zero())) + return Op0; + + // X - X -> 0 + if (Op0 == Op1) + return Constant::getNullValue(Op0->getType()); + + // (X*2) - X -> X + // (X<<1) - X -> X + Value *X = 0; + if (match(Op0, m_Mul(m_Specific(Op1), m_ConstantInt<2>())) || + match(Op0, m_Shl(m_Specific(Op1), m_One()))) + return Op1; + + // (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies. + // For example, (X + Y) - Y -> X; (Y + X) - Y -> X + Value *Y = 0, *Z = Op1; + if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z + // See if "V === Y - Z" simplifies. + if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, TD, DT, MaxRecurse-1)) + // It does! Now see if "X + V" simplifies. + if (Value *W = SimplifyBinOp(Instruction::Add, X, V, TD, DT, + MaxRecurse-1)) { + // It does, we successfully reassociated! + ++NumReassoc; + return W; + } + // See if "V === X - Z" simplifies. + if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1)) + // It does! Now see if "Y + V" simplifies. + if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, TD, DT, + MaxRecurse-1)) { + // It does, we successfully reassociated! + ++NumReassoc; + return W; + } + } + + // X - (Y + Z) -> (X - Y) - Z or (X - Z) - Y if everything simplifies. + // For example, X - (X + 1) -> -1 + X = Op0; + if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z) + // See if "V === X - Y" simplifies. + if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, TD, DT, MaxRecurse-1)) + // It does! Now see if "V - Z" simplifies. + if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, TD, DT, + MaxRecurse-1)) { + // It does, we successfully reassociated! + ++NumReassoc; + return W; + } + // See if "V === X - Z" simplifies. + if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1)) + // It does! Now see if "V - Y" simplifies. + if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, TD, DT, + MaxRecurse-1)) { + // It does, we successfully reassociated! + ++NumReassoc; + return W; + } + } + + // Z - (X - Y) -> (Z - X) + Y if everything simplifies. + // For example, X - (X - Y) -> Y. + Z = Op0; + if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y) + // See if "V === Z - X" simplifies. + if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, TD, DT, MaxRecurse-1)) + // It does! Now see if "V + Y" simplifies. + if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, TD, DT, + MaxRecurse-1)) { + // It does, we successfully reassociated! + ++NumReassoc; + return W; + } + + // Mul distributes over Sub. Try some generic simplifications based on this. + if (Value *V = FactorizeBinOp(Instruction::Sub, Op0, Op1, Instruction::Mul, + TD, DT, MaxRecurse)) + return V; + + // i1 sub -> xor. + if (MaxRecurse && Op0->getType()->isIntegerTy(1)) + if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1)) + return V; + + // Threading Sub over selects and phi nodes is pointless, so don't bother. + // Threading over the select in "A - select(cond, B, C)" means evaluating + // "A-B" and "A-C" and seeing if they are equal; but they are equal if and + // only if B and C are equal. If B and C are equal then (since we assume + // that operands have already been simplified) "select(cond, B, C)" should + // have been simplified to the common value of B and C already. Analysing + // "A-B" and "A-C" thus gains nothing, but costs compile time. Similarly + // for threading over phi nodes. + + return 0; +} + +Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const TargetData *TD, const DominatorTree *DT) { + return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit); +} + +/// SimplifyMulInst - Given operands for a Mul, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT, unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) { + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(), + Ops, 2, TD); + } + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + // X * undef -> 0 + if (match(Op1, m_Undef())) + return Constant::getNullValue(Op0->getType()); + + // X * 0 -> 0 + if (match(Op1, m_Zero())) + return Op1; + + // X * 1 -> X + if (match(Op1, m_One())) + return Op0; + + // (X / Y) * Y -> X if the division is exact. + Value *X = 0, *Y = 0; + if ((match(Op0, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op1) || // (X / Y) * Y + (match(Op1, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op0)) { // Y * (X / Y) + BinaryOperator *Div = cast<BinaryOperator>(Y == Op1 ? Op0 : Op1); + if (Div->isExact()) + return X; + } + + // i1 mul -> and. + if (MaxRecurse && Op0->getType()->isIntegerTy(1)) + if (Value *V = SimplifyAndInst(Op0, Op1, TD, DT, MaxRecurse-1)) + return V; + + // Try some generic simplifications for associative operations. + if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, TD, DT, + MaxRecurse)) + return V; + + // Mul distributes over Add. Try some generic simplifications based on this. + if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add, + TD, DT, MaxRecurse)) + return V; + + // If the operation is with the result of a select instruction, check whether + // operating on either branch of the select always yields the same value. + if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) + if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, TD, DT, + MaxRecurse)) + return V; + + // If the operation is with the result of a phi instruction, check whether + // operating on all incoming values of the phi always yields the same value. + if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) + if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, TD, DT, + MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifyMulInst(Op0, Op1, TD, DT, RecursionLimit); +} + +/// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { + if (Constant *C0 = dyn_cast<Constant>(Op0)) { + if (Constant *C1 = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { C0, C1 }; + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD); + } + } + + bool isSigned = Opcode == Instruction::SDiv; + + // X / undef -> undef + if (match(Op1, m_Undef())) + return Op1; + + // undef / X -> 0 + if (match(Op0, m_Undef())) + return Constant::getNullValue(Op0->getType()); + + // 0 / X -> 0, we don't need to preserve faults! + if (match(Op0, m_Zero())) + return Op0; + + // X / 1 -> X + if (match(Op1, m_One())) + return Op0; + + if (Op0->getType()->isIntegerTy(1)) + // It can't be division by zero, hence it must be division by one. + return Op0; + + // X / X -> 1 + if (Op0 == Op1) + return ConstantInt::get(Op0->getType(), 1); + + // (X * Y) / Y -> X if the multiplication does not overflow. + Value *X = 0, *Y = 0; + if (match(Op0, m_Mul(m_Value(X), m_Value(Y))) && (X == Op1 || Y == Op1)) { + if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1 + BinaryOperator *Mul = cast<BinaryOperator>(Op0); + // If the Mul knows it does not overflow, then we are good to go. + if ((isSigned && Mul->hasNoSignedWrap()) || + (!isSigned && Mul->hasNoUnsignedWrap())) + return X; + // If X has the form X = A / Y then X * Y cannot overflow. + if (BinaryOperator *Div = dyn_cast<BinaryOperator>(X)) + if (Div->getOpcode() == Opcode && Div->getOperand(1) == Y) + return X; + } + + // (X rem Y) / Y -> 0 + if ((isSigned && match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) || + (!isSigned && match(Op0, m_URem(m_Value(), m_Specific(Op1))))) + return Constant::getNullValue(Op0->getType()); + + // If the operation is with the result of a select instruction, check whether + // operating on either branch of the select always yields the same value. + if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) + if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + // If the operation is with the result of a phi instruction, check whether + // operating on all incoming values of the phi always yields the same value. + if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) + if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + return 0; +} + +/// SimplifySDivInst - Given operands for an SDiv, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT, unsigned MaxRecurse) { + if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifySDivInst(Op0, Op1, TD, DT, RecursionLimit); +} + +/// SimplifyUDivInst - Given operands for a UDiv, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT, unsigned MaxRecurse) { + if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifyUDivInst(Op0, Op1, TD, DT, RecursionLimit); +} + +static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *, + const DominatorTree *, unsigned) { + // undef / X -> undef (the undef could be a snan). + if (match(Op0, m_Undef())) + return Op0; + + // X / undef -> undef + if (match(Op1, m_Undef())) + return Op1; + + return 0; +} + +Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifyFDivInst(Op0, Op1, TD, DT, RecursionLimit); +} + +/// SimplifyRem - Given operands for an SRem or URem, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { + if (Constant *C0 = dyn_cast<Constant>(Op0)) { + if (Constant *C1 = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { C0, C1 }; + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD); + } + } + + // X % undef -> undef + if (match(Op1, m_Undef())) + return Op1; + + // undef % X -> 0 + if (match(Op0, m_Undef())) + return Constant::getNullValue(Op0->getType()); + + // 0 % X -> 0, we don't need to preserve faults! + if (match(Op0, m_Zero())) + return Op0; + + // X % 0 -> undef, we don't need to preserve faults! + if (match(Op1, m_Zero())) + return UndefValue::get(Op0->getType()); + + // X % 1 -> 0 + if (match(Op1, m_One())) + return Constant::getNullValue(Op0->getType()); + + if (Op0->getType()->isIntegerTy(1)) + // It can't be remainder by zero, hence it must be remainder by one. + return Constant::getNullValue(Op0->getType()); + + // X % X -> 0 + if (Op0 == Op1) + return Constant::getNullValue(Op0->getType()); + + // If the operation is with the result of a select instruction, check whether + // operating on either branch of the select always yields the same value. + if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) + if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + // If the operation is with the result of a phi instruction, check whether + // operating on all incoming values of the phi always yields the same value. + if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) + if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + return 0; +} + +/// SimplifySRemInst - Given operands for an SRem, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT, unsigned MaxRecurse) { + if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifySRemInst(Op0, Op1, TD, DT, RecursionLimit); +} + +/// SimplifyURemInst - Given operands for a URem, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT, unsigned MaxRecurse) { + if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifyURemInst(Op0, Op1, TD, DT, RecursionLimit); +} + +static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *, + const DominatorTree *, unsigned) { + // undef % X -> undef (the undef could be a snan). + if (match(Op0, m_Undef())) + return Op0; + + // X % undef -> undef + if (match(Op1, m_Undef())) + return Op1; + + return 0; +} + +Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifyFRemInst(Op0, Op1, TD, DT, RecursionLimit); +} + +/// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { + if (Constant *C0 = dyn_cast<Constant>(Op0)) { + if (Constant *C1 = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { C0, C1 }; + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD); + } + } + + // 0 shift by X -> 0 + if (match(Op0, m_Zero())) + return Op0; + + // X shift by 0 -> X + if (match(Op1, m_Zero())) + return Op0; + + // X shift by undef -> undef because it may shift by the bitwidth. + if (match(Op1, m_Undef())) + return Op1; + + // Shifting by the bitwidth or more is undefined. + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) + if (CI->getValue().getLimitedValue() >= + Op0->getType()->getScalarSizeInBits()) + return UndefValue::get(Op0->getType()); + + // If the operation is with the result of a select instruction, check whether + // operating on either branch of the select always yields the same value. + if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) + if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + // If the operation is with the result of a phi instruction, check whether + // operating on all incoming values of the phi always yields the same value. + if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) + if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + return 0; +} + +/// SimplifyShlInst - Given operands for an Shl, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { + if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + // undef << X -> 0 + if (match(Op0, m_Undef())) + return Constant::getNullValue(Op0->getType()); + + // (X >> A) << A -> X + Value *X; + if (match(Op0, m_Shr(m_Value(X), m_Specific(Op1))) && + cast<PossiblyExactOperator>(Op0)->isExact()) + return X; + return 0; +} + +Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const TargetData *TD, const DominatorTree *DT) { + return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit); +} + +/// SimplifyLShrInst - Given operands for an LShr, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { + if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + // undef >>l X -> 0 + if (match(Op0, m_Undef())) + return Constant::getNullValue(Op0->getType()); + + // (X << A) >> A -> X + Value *X; + if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) && + cast<OverflowingBinaryOperator>(Op0)->hasNoUnsignedWrap()) + return X; + + return 0; +} + +Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, + const TargetData *TD, const DominatorTree *DT) { + return ::SimplifyLShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit); +} + +/// SimplifyAShrInst - Given operands for an AShr, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { + if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + // all ones >>a X -> all ones + if (match(Op0, m_AllOnes())) + return Op0; + + // undef >>a X -> all ones + if (match(Op0, m_Undef())) + return Constant::getAllOnesValue(Op0->getType()); + + // (X << A) >> A -> X + Value *X; + if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) && + cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap()) + return X; + + return 0; +} + +Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, + const TargetData *TD, const DominatorTree *DT) { + return ::SimplifyAShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit); +} + +/// SimplifyAndInst - Given operands for an And, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT, unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) { + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::And, CLHS->getType(), + Ops, 2, TD); + } + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + // X & undef -> 0 + if (match(Op1, m_Undef())) + return Constant::getNullValue(Op0->getType()); + + // X & X = X + if (Op0 == Op1) + return Op0; + + // X & 0 = 0 + if (match(Op1, m_Zero())) + return Op1; + + // X & -1 = X + if (match(Op1, m_AllOnes())) + return Op0; + + // A & ~A = ~A & A = 0 + if (match(Op0, m_Not(m_Specific(Op1))) || + match(Op1, m_Not(m_Specific(Op0)))) + return Constant::getNullValue(Op0->getType()); + + // (A | ?) & A = A + Value *A = 0, *B = 0; + if (match(Op0, m_Or(m_Value(A), m_Value(B))) && + (A == Op1 || B == Op1)) + return Op1; + + // A & (A | ?) = A + if (match(Op1, m_Or(m_Value(A), m_Value(B))) && + (A == Op0 || B == Op0)) + return Op0; + + // Try some generic simplifications for associative operations. + if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, TD, DT, + MaxRecurse)) + return V; + + // And distributes over Or. Try some generic simplifications based on this. + if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Or, + TD, DT, MaxRecurse)) + return V; + + // And distributes over Xor. Try some generic simplifications based on this. + if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Xor, + TD, DT, MaxRecurse)) + return V; + + // Or distributes over And. Try some generic simplifications based on this. + if (Value *V = FactorizeBinOp(Instruction::And, Op0, Op1, Instruction::Or, + TD, DT, MaxRecurse)) + return V; + + // If the operation is with the result of a select instruction, check whether + // operating on either branch of the select always yields the same value. + if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) + if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, TD, DT, + MaxRecurse)) + return V; + + // If the operation is with the result of a phi instruction, check whether + // operating on all incoming values of the phi always yields the same value. + if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) + if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, TD, DT, + MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifyAndInst(Op0, Op1, TD, DT, RecursionLimit); +} + +/// SimplifyOrInst - Given operands for an Or, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT, unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) { + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(), + Ops, 2, TD); + } + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + // X | undef -> -1 + if (match(Op1, m_Undef())) + return Constant::getAllOnesValue(Op0->getType()); + + // X | X = X + if (Op0 == Op1) + return Op0; + + // X | 0 = X + if (match(Op1, m_Zero())) + return Op0; + + // X | -1 = -1 + if (match(Op1, m_AllOnes())) + return Op1; + + // A | ~A = ~A | A = -1 + if (match(Op0, m_Not(m_Specific(Op1))) || + match(Op1, m_Not(m_Specific(Op0)))) + return Constant::getAllOnesValue(Op0->getType()); + + // (A & ?) | A = A + Value *A = 0, *B = 0; + if (match(Op0, m_And(m_Value(A), m_Value(B))) && + (A == Op1 || B == Op1)) + return Op1; + + // A | (A & ?) = A + if (match(Op1, m_And(m_Value(A), m_Value(B))) && + (A == Op0 || B == Op0)) + return Op0; + + // ~(A & ?) | A = -1 + if (match(Op0, m_Not(m_And(m_Value(A), m_Value(B)))) && + (A == Op1 || B == Op1)) + return Constant::getAllOnesValue(Op1->getType()); + + // A | ~(A & ?) = -1 + if (match(Op1, m_Not(m_And(m_Value(A), m_Value(B)))) && + (A == Op0 || B == Op0)) + return Constant::getAllOnesValue(Op0->getType()); + + // Try some generic simplifications for associative operations. + if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, TD, DT, + MaxRecurse)) + return V; + + // Or distributes over And. Try some generic simplifications based on this. + if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And, + TD, DT, MaxRecurse)) + return V; + + // And distributes over Or. Try some generic simplifications based on this. + if (Value *V = FactorizeBinOp(Instruction::Or, Op0, Op1, Instruction::And, + TD, DT, MaxRecurse)) + return V; + + // If the operation is with the result of a select instruction, check whether + // operating on either branch of the select always yields the same value. + if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) + if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, TD, DT, + MaxRecurse)) + return V; + + // If the operation is with the result of a phi instruction, check whether + // operating on all incoming values of the phi always yields the same value. + if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) + if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, TD, DT, + MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifyOrInst(Op0, Op1, TD, DT, RecursionLimit); +} + +/// SimplifyXorInst - Given operands for a Xor, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT, unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast<Constant>(Op0)) { + if (Constant *CRHS = dyn_cast<Constant>(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(), + Ops, 2, TD); + } + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + // A ^ undef -> undef + if (match(Op1, m_Undef())) + return Op1; + + // A ^ 0 = A + if (match(Op1, m_Zero())) + return Op0; + + // A ^ A = 0 + if (Op0 == Op1) + return Constant::getNullValue(Op0->getType()); + + // A ^ ~A = ~A ^ A = -1 + if (match(Op0, m_Not(m_Specific(Op1))) || + match(Op1, m_Not(m_Specific(Op0)))) + return Constant::getAllOnesValue(Op0->getType()); + + // Try some generic simplifications for associative operations. + if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, TD, DT, + MaxRecurse)) + return V; + + // And distributes over Xor. Try some generic simplifications based on this. + if (Value *V = FactorizeBinOp(Instruction::Xor, Op0, Op1, Instruction::And, + TD, DT, MaxRecurse)) + return V; + + // Threading Xor over selects and phi nodes is pointless, so don't bother. + // Threading over the select in "A ^ select(cond, B, C)" means evaluating + // "A^B" and "A^C" and seeing if they are equal; but they are equal if and + // only if B and C are equal. If B and C are equal then (since we assume + // that operands have already been simplified) "select(cond, B, C)" should + // have been simplified to the common value of B and C already. Analysing + // "A^B" and "A^C" thus gains nothing, but costs compile time. Similarly + // for threading over phi nodes. + + return 0; +} + +Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifyXorInst(Op0, Op1, TD, DT, RecursionLimit); +} + +static const Type *GetCompareTy(Value *Op) { + return CmpInst::makeCmpResultType(Op->getType()); +} + +/// ExtractEquivalentCondition - Rummage around inside V looking for something +/// equivalent to the comparison "LHS Pred RHS". Return such a value if found, +/// otherwise return null. Helper function for analyzing max/min idioms. +static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, + Value *LHS, Value *RHS) { + SelectInst *SI = dyn_cast<SelectInst>(V); + if (!SI) + return 0; + CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition()); + if (!Cmp) + return 0; + Value *CmpLHS = Cmp->getOperand(0), *CmpRHS = Cmp->getOperand(1); + if (Pred == Cmp->getPredicate() && LHS == CmpLHS && RHS == CmpRHS) + return Cmp; + if (Pred == CmpInst::getSwappedPredicate(Cmp->getPredicate()) && + LHS == CmpRHS && RHS == CmpLHS) + return Cmp; + return 0; +} + +/// SimplifyICmpInst - Given operands for an ICmpInst, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { + CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; + assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!"); + + if (Constant *CLHS = dyn_cast<Constant>(LHS)) { + if (Constant *CRHS = dyn_cast<Constant>(RHS)) + return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD); + + // If we have a constant, make sure it is on the RHS. + std::swap(LHS, RHS); + Pred = CmpInst::getSwappedPredicate(Pred); + } + + const Type *ITy = GetCompareTy(LHS); // The return type. + const Type *OpTy = LHS->getType(); // The operand type. + + // icmp X, X -> true/false + // X icmp undef -> true/false. For example, icmp ugt %X, undef -> false + // because X could be 0. + if (LHS == RHS || isa<UndefValue>(RHS)) + return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred)); + + // Special case logic when the operands have i1 type. + if (OpTy->isIntegerTy(1) || (OpTy->isVectorTy() && + cast<VectorType>(OpTy)->getElementType()->isIntegerTy(1))) { + switch (Pred) { + default: break; + case ICmpInst::ICMP_EQ: + // X == 1 -> X + if (match(RHS, m_One())) + return LHS; + break; + case ICmpInst::ICMP_NE: + // X != 0 -> X + if (match(RHS, m_Zero())) + return LHS; + break; + case ICmpInst::ICMP_UGT: + // X >u 0 -> X + if (match(RHS, m_Zero())) + return LHS; + break; + case ICmpInst::ICMP_UGE: + // X >=u 1 -> X + if (match(RHS, m_One())) + return LHS; + break; + case ICmpInst::ICMP_SLT: + // X <s 0 -> X + if (match(RHS, m_Zero())) + return LHS; + break; + case ICmpInst::ICMP_SLE: + // X <=s -1 -> X + if (match(RHS, m_One())) + return LHS; + break; + } + } + + // icmp <alloca*>, <global/alloca*/null> - Different stack variables have + // different addresses, and what's more the address of a stack variable is + // never null or equal to the address of a global. Note that generalizing + // to the case where LHS is a global variable address or null is pointless, + // since if both LHS and RHS are constants then we already constant folded + // the compare, and if only one of them is then we moved it to RHS already. + if (isa<AllocaInst>(LHS) && (isa<GlobalValue>(RHS) || isa<AllocaInst>(RHS) || + isa<ConstantPointerNull>(RHS))) + // We already know that LHS != RHS. + return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred)); + + // If we are comparing with zero then try hard since this is a common case. + if (match(RHS, m_Zero())) { + bool LHSKnownNonNegative, LHSKnownNegative; + switch (Pred) { + default: + assert(false && "Unknown ICmp predicate!"); + case ICmpInst::ICMP_ULT: + // getNullValue also works for vectors, unlike getFalse. + return Constant::getNullValue(ITy); + case ICmpInst::ICMP_UGE: + // getAllOnesValue also works for vectors, unlike getTrue. + return ConstantInt::getAllOnesValue(ITy); + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_ULE: + if (isKnownNonZero(LHS, TD)) + return Constant::getNullValue(ITy); + break; + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_UGT: + if (isKnownNonZero(LHS, TD)) + return ConstantInt::getAllOnesValue(ITy); + break; + case ICmpInst::ICMP_SLT: + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); + if (LHSKnownNegative) + return ConstantInt::getAllOnesValue(ITy); + if (LHSKnownNonNegative) + return Constant::getNullValue(ITy); + break; + case ICmpInst::ICMP_SLE: + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); + if (LHSKnownNegative) + return ConstantInt::getAllOnesValue(ITy); + if (LHSKnownNonNegative && isKnownNonZero(LHS, TD)) + return Constant::getNullValue(ITy); + break; + case ICmpInst::ICMP_SGE: + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); + if (LHSKnownNegative) + return Constant::getNullValue(ITy); + if (LHSKnownNonNegative) + return ConstantInt::getAllOnesValue(ITy); + break; + case ICmpInst::ICMP_SGT: + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); + if (LHSKnownNegative) + return Constant::getNullValue(ITy); + if (LHSKnownNonNegative && isKnownNonZero(LHS, TD)) + return ConstantInt::getAllOnesValue(ITy); + break; + } + } + + // See if we are doing a comparison with a constant integer. + if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { + // Rule out tautological comparisons (eg., ult 0 or uge 0). + ConstantRange RHS_CR = ICmpInst::makeConstantRange(Pred, CI->getValue()); + if (RHS_CR.isEmptySet()) + return ConstantInt::getFalse(CI->getContext()); + if (RHS_CR.isFullSet()) + return ConstantInt::getTrue(CI->getContext()); + + // Many binary operators with constant RHS have easy to compute constant + // range. Use them to check whether the comparison is a tautology. + uint32_t Width = CI->getBitWidth(); + APInt Lower = APInt(Width, 0); + APInt Upper = APInt(Width, 0); + ConstantInt *CI2; + if (match(LHS, m_URem(m_Value(), m_ConstantInt(CI2)))) { + // 'urem x, CI2' produces [0, CI2). + Upper = CI2->getValue(); + } else if (match(LHS, m_SRem(m_Value(), m_ConstantInt(CI2)))) { + // 'srem x, CI2' produces (-|CI2|, |CI2|). + Upper = CI2->getValue().abs(); + Lower = (-Upper) + 1; + } else if (match(LHS, m_UDiv(m_Value(), m_ConstantInt(CI2)))) { + // 'udiv x, CI2' produces [0, UINT_MAX / CI2]. + APInt NegOne = APInt::getAllOnesValue(Width); + if (!CI2->isZero()) + Upper = NegOne.udiv(CI2->getValue()) + 1; + } else if (match(LHS, m_SDiv(m_Value(), m_ConstantInt(CI2)))) { + // 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2]. + APInt IntMin = APInt::getSignedMinValue(Width); + APInt IntMax = APInt::getSignedMaxValue(Width); + APInt Val = CI2->getValue().abs(); + if (!Val.isMinValue()) { + Lower = IntMin.sdiv(Val); + Upper = IntMax.sdiv(Val) + 1; + } + } else if (match(LHS, m_LShr(m_Value(), m_ConstantInt(CI2)))) { + // 'lshr x, CI2' produces [0, UINT_MAX >> CI2]. + APInt NegOne = APInt::getAllOnesValue(Width); + if (CI2->getValue().ult(Width)) + Upper = NegOne.lshr(CI2->getValue()) + 1; + } else if (match(LHS, m_AShr(m_Value(), m_ConstantInt(CI2)))) { + // 'ashr x, CI2' produces [INT_MIN >> CI2, INT_MAX >> CI2]. + APInt IntMin = APInt::getSignedMinValue(Width); + APInt IntMax = APInt::getSignedMaxValue(Width); + if (CI2->getValue().ult(Width)) { + Lower = IntMin.ashr(CI2->getValue()); + Upper = IntMax.ashr(CI2->getValue()) + 1; + } + } else if (match(LHS, m_Or(m_Value(), m_ConstantInt(CI2)))) { + // 'or x, CI2' produces [CI2, UINT_MAX]. + Lower = CI2->getValue(); + } else if (match(LHS, m_And(m_Value(), m_ConstantInt(CI2)))) { + // 'and x, CI2' produces [0, CI2]. + Upper = CI2->getValue() + 1; + } + if (Lower != Upper) { + ConstantRange LHS_CR = ConstantRange(Lower, Upper); + if (RHS_CR.contains(LHS_CR)) + return ConstantInt::getTrue(RHS->getContext()); + if (RHS_CR.inverse().contains(LHS_CR)) + return ConstantInt::getFalse(RHS->getContext()); + } + } + + // Compare of cast, for example (zext X) != 0 -> X != 0 + if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) { + Instruction *LI = cast<CastInst>(LHS); + Value *SrcOp = LI->getOperand(0); + const Type *SrcTy = SrcOp->getType(); + const Type *DstTy = LI->getType(); + + // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input + // if the integer type is the same size as the pointer type. + if (MaxRecurse && TD && isa<PtrToIntInst>(LI) && + TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) { + if (Constant *RHSC = dyn_cast<Constant>(RHS)) { + // Transfer the cast to the constant. + if (Value *V = SimplifyICmpInst(Pred, SrcOp, + ConstantExpr::getIntToPtr(RHSC, SrcTy), + TD, DT, MaxRecurse-1)) + return V; + } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) { + if (RI->getOperand(0)->getType() == SrcTy) + // Compare without the cast. + if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0), + TD, DT, MaxRecurse-1)) + return V; + } + } + + if (isa<ZExtInst>(LHS)) { + // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have the + // same type. + if (ZExtInst *RI = dyn_cast<ZExtInst>(RHS)) { + if (MaxRecurse && SrcTy == RI->getOperand(0)->getType()) + // Compare X and Y. Note that signed predicates become unsigned. + if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), + SrcOp, RI->getOperand(0), TD, DT, + MaxRecurse-1)) + return V; + } + // Turn icmp (zext X), Cst into a compare of X and Cst if Cst is extended + // too. If not, then try to deduce the result of the comparison. + else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { + // Compute the constant that would happen if we truncated to SrcTy then + // reextended to DstTy. + Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy); + Constant *RExt = ConstantExpr::getCast(CastInst::ZExt, Trunc, DstTy); + + // If the re-extended constant didn't change then this is effectively + // also a case of comparing two zero-extended values. + if (RExt == CI && MaxRecurse) + if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), + SrcOp, Trunc, TD, DT, MaxRecurse-1)) + return V; + + // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit + // there. Use this to work out the result of the comparison. + if (RExt != CI) { + switch (Pred) { + default: + assert(false && "Unknown ICmp predicate!"); + // LHS <u RHS. + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + return ConstantInt::getFalse(CI->getContext()); + + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + return ConstantInt::getTrue(CI->getContext()); + + // LHS is non-negative. If RHS is negative then LHS >s LHS. If RHS + // is non-negative then LHS <s RHS. + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + return CI->getValue().isNegative() ? + ConstantInt::getTrue(CI->getContext()) : + ConstantInt::getFalse(CI->getContext()); + + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + return CI->getValue().isNegative() ? + ConstantInt::getFalse(CI->getContext()) : + ConstantInt::getTrue(CI->getContext()); + } + } + } + } + + if (isa<SExtInst>(LHS)) { + // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the + // same type. + if (SExtInst *RI = dyn_cast<SExtInst>(RHS)) { + if (MaxRecurse && SrcTy == RI->getOperand(0)->getType()) + // Compare X and Y. Note that the predicate does not change. + if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0), + TD, DT, MaxRecurse-1)) + return V; + } + // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended + // too. If not, then try to deduce the result of the comparison. + else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { + // Compute the constant that would happen if we truncated to SrcTy then + // reextended to DstTy. + Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy); + Constant *RExt = ConstantExpr::getCast(CastInst::SExt, Trunc, DstTy); + + // If the re-extended constant didn't change then this is effectively + // also a case of comparing two sign-extended values. + if (RExt == CI && MaxRecurse) + if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, TD, DT, + MaxRecurse-1)) + return V; + + // Otherwise the upper bits of LHS are all equal, while RHS has varying + // bits there. Use this to work out the result of the comparison. + if (RExt != CI) { + switch (Pred) { + default: + assert(false && "Unknown ICmp predicate!"); + case ICmpInst::ICMP_EQ: + return ConstantInt::getFalse(CI->getContext()); + case ICmpInst::ICMP_NE: + return ConstantInt::getTrue(CI->getContext()); + + // If RHS is non-negative then LHS <s RHS. If RHS is negative then + // LHS >s RHS. + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + return CI->getValue().isNegative() ? + ConstantInt::getTrue(CI->getContext()) : + ConstantInt::getFalse(CI->getContext()); + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + return CI->getValue().isNegative() ? + ConstantInt::getFalse(CI->getContext()) : + ConstantInt::getTrue(CI->getContext()); + + // If LHS is non-negative then LHS <u RHS. If LHS is negative then + // LHS >u RHS. + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + // Comparison is true iff the LHS <s 0. + if (MaxRecurse) + if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp, + Constant::getNullValue(SrcTy), + TD, DT, MaxRecurse-1)) + return V; + break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + // Comparison is true iff the LHS >=s 0. + if (MaxRecurse) + if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp, + Constant::getNullValue(SrcTy), + TD, DT, MaxRecurse-1)) + return V; + break; + } + } + } + } + } + + // Special logic for binary operators. + BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS); + BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS); + if (MaxRecurse && (LBO || RBO)) { + // Analyze the case when either LHS or RHS is an add instruction. + Value *A = 0, *B = 0, *C = 0, *D = 0; + // LHS = A + B (or A and B are null); RHS = C + D (or C and D are null). + bool NoLHSWrapProblem = false, NoRHSWrapProblem = false; + if (LBO && LBO->getOpcode() == Instruction::Add) { + A = LBO->getOperand(0); B = LBO->getOperand(1); + NoLHSWrapProblem = ICmpInst::isEquality(Pred) || + (CmpInst::isUnsigned(Pred) && LBO->hasNoUnsignedWrap()) || + (CmpInst::isSigned(Pred) && LBO->hasNoSignedWrap()); + } + if (RBO && RBO->getOpcode() == Instruction::Add) { + C = RBO->getOperand(0); D = RBO->getOperand(1); + NoRHSWrapProblem = ICmpInst::isEquality(Pred) || + (CmpInst::isUnsigned(Pred) && RBO->hasNoUnsignedWrap()) || + (CmpInst::isSigned(Pred) && RBO->hasNoSignedWrap()); + } + + // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow. + if ((A == RHS || B == RHS) && NoLHSWrapProblem) + if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A, + Constant::getNullValue(RHS->getType()), + TD, DT, MaxRecurse-1)) + return V; + + // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow. + if ((C == LHS || D == LHS) && NoRHSWrapProblem) + if (Value *V = SimplifyICmpInst(Pred, + Constant::getNullValue(LHS->getType()), + C == LHS ? D : C, TD, DT, MaxRecurse-1)) + return V; + + // icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow. + if (A && C && (A == C || A == D || B == C || B == D) && + NoLHSWrapProblem && NoRHSWrapProblem) { + // Determine Y and Z in the form icmp (X+Y), (X+Z). + Value *Y = (A == C || A == D) ? B : A; + Value *Z = (C == A || C == B) ? D : C; + if (Value *V = SimplifyICmpInst(Pred, Y, Z, TD, DT, MaxRecurse-1)) + return V; + } + } + + if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) { + bool KnownNonNegative, KnownNegative; + switch (Pred) { + default: + break; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD); + if (!KnownNonNegative) + break; + // fall-through + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + // getNullValue also works for vectors, unlike getFalse. + return Constant::getNullValue(ITy); + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD); + if (!KnownNonNegative) + break; + // fall-through + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + // getAllOnesValue also works for vectors, unlike getTrue. + return Constant::getAllOnesValue(ITy); + } + } + if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) { + bool KnownNonNegative, KnownNegative; + switch (Pred) { + default: + break; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD); + if (!KnownNonNegative) + break; + // fall-through + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + // getAllOnesValue also works for vectors, unlike getTrue. + return Constant::getAllOnesValue(ITy); + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD); + if (!KnownNonNegative) + break; + // fall-through + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + // getNullValue also works for vectors, unlike getFalse. + return Constant::getNullValue(ITy); + } + } + + if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() && + LBO->getOperand(1) == RBO->getOperand(1)) { + switch (LBO->getOpcode()) { + default: break; + case Instruction::UDiv: + case Instruction::LShr: + if (ICmpInst::isSigned(Pred)) + break; + // fall-through + case Instruction::SDiv: + case Instruction::AShr: + if (!LBO->isExact() || !RBO->isExact()) + break; + if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), + RBO->getOperand(0), TD, DT, MaxRecurse-1)) + return V; + break; + case Instruction::Shl: { + bool NUW = LBO->hasNoUnsignedWrap() && LBO->hasNoUnsignedWrap(); + bool NSW = LBO->hasNoSignedWrap() && RBO->hasNoSignedWrap(); + if (!NUW && !NSW) + break; + if (!NSW && ICmpInst::isSigned(Pred)) + break; + if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), + RBO->getOperand(0), TD, DT, MaxRecurse-1)) + return V; + break; + } + } + } + + // Simplify comparisons involving max/min. + Value *A, *B; + CmpInst::Predicate P = CmpInst::BAD_ICMP_PREDICATE; + CmpInst::Predicate EqP; // Chosen so that "A == max/min(A,B)" iff "A EqP B". + + // Signed variants on "max(a,b)>=a -> true". + if (match(LHS, m_SMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) { + if (A != RHS) std::swap(A, B); // smax(A, B) pred A. + EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B". + // We analyze this as smax(A, B) pred A. + P = Pred; + } else if (match(RHS, m_SMax(m_Value(A), m_Value(B))) && + (A == LHS || B == LHS)) { + if (A != LHS) std::swap(A, B); // A pred smax(A, B). + EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B". + // We analyze this as smax(A, B) swapped-pred A. + P = CmpInst::getSwappedPredicate(Pred); + } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) && + (A == RHS || B == RHS)) { + if (A != RHS) std::swap(A, B); // smin(A, B) pred A. + EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B". + // We analyze this as smax(-A, -B) swapped-pred -A. + // Note that we do not need to actually form -A or -B thanks to EqP. + P = CmpInst::getSwappedPredicate(Pred); + } else if (match(RHS, m_SMin(m_Value(A), m_Value(B))) && + (A == LHS || B == LHS)) { + if (A != LHS) std::swap(A, B); // A pred smin(A, B). + EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B". + // We analyze this as smax(-A, -B) pred -A. + // Note that we do not need to actually form -A or -B thanks to EqP. + P = Pred; + } + if (P != CmpInst::BAD_ICMP_PREDICATE) { + // Cases correspond to "max(A, B) p A". + switch (P) { + default: + break; + case CmpInst::ICMP_EQ: + case CmpInst::ICMP_SLE: + // Equivalent to "A EqP B". This may be the same as the condition tested + // in the max/min; if so, we can just return that. + if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B)) + return V; + if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B)) + return V; + // Otherwise, see if "A EqP B" simplifies. + if (MaxRecurse) + if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1)) + return V; + break; + case CmpInst::ICMP_NE: + case CmpInst::ICMP_SGT: { + CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP); + // Equivalent to "A InvEqP B". This may be the same as the condition + // tested in the max/min; if so, we can just return that. + if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B)) + return V; + if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B)) + return V; + // Otherwise, see if "A InvEqP B" simplifies. + if (MaxRecurse) + if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1)) + return V; + break; + } + case CmpInst::ICMP_SGE: + // Always true. + return Constant::getAllOnesValue(ITy); + case CmpInst::ICMP_SLT: + // Always false. + return Constant::getNullValue(ITy); + } + } + + // Unsigned variants on "max(a,b)>=a -> true". + P = CmpInst::BAD_ICMP_PREDICATE; + if (match(LHS, m_UMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) { + if (A != RHS) std::swap(A, B); // umax(A, B) pred A. + EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B". + // We analyze this as umax(A, B) pred A. + P = Pred; + } else if (match(RHS, m_UMax(m_Value(A), m_Value(B))) && + (A == LHS || B == LHS)) { + if (A != LHS) std::swap(A, B); // A pred umax(A, B). + EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B". + // We analyze this as umax(A, B) swapped-pred A. + P = CmpInst::getSwappedPredicate(Pred); + } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) && + (A == RHS || B == RHS)) { + if (A != RHS) std::swap(A, B); // umin(A, B) pred A. + EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B". + // We analyze this as umax(-A, -B) swapped-pred -A. + // Note that we do not need to actually form -A or -B thanks to EqP. + P = CmpInst::getSwappedPredicate(Pred); + } else if (match(RHS, m_UMin(m_Value(A), m_Value(B))) && + (A == LHS || B == LHS)) { + if (A != LHS) std::swap(A, B); // A pred umin(A, B). + EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B". + // We analyze this as umax(-A, -B) pred -A. + // Note that we do not need to actually form -A or -B thanks to EqP. + P = Pred; + } + if (P != CmpInst::BAD_ICMP_PREDICATE) { + // Cases correspond to "max(A, B) p A". + switch (P) { + default: + break; + case CmpInst::ICMP_EQ: + case CmpInst::ICMP_ULE: + // Equivalent to "A EqP B". This may be the same as the condition tested + // in the max/min; if so, we can just return that. + if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B)) + return V; + if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B)) + return V; + // Otherwise, see if "A EqP B" simplifies. + if (MaxRecurse) + if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1)) + return V; + break; + case CmpInst::ICMP_NE: + case CmpInst::ICMP_UGT: { + CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP); + // Equivalent to "A InvEqP B". This may be the same as the condition + // tested in the max/min; if so, we can just return that. + if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B)) + return V; + if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B)) + return V; + // Otherwise, see if "A InvEqP B" simplifies. + if (MaxRecurse) + if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1)) + return V; + break; + } + case CmpInst::ICMP_UGE: + // Always true. + return Constant::getAllOnesValue(ITy); + case CmpInst::ICMP_ULT: + // Always false. + return Constant::getNullValue(ITy); + } + } + + // Variants on "max(x,y) >= min(x,z)". + Value *C, *D; + if (match(LHS, m_SMax(m_Value(A), m_Value(B))) && + match(RHS, m_SMin(m_Value(C), m_Value(D))) && + (A == C || A == D || B == C || B == D)) { + // max(x, ?) pred min(x, ?). + if (Pred == CmpInst::ICMP_SGE) + // Always true. + return Constant::getAllOnesValue(ITy); + if (Pred == CmpInst::ICMP_SLT) + // Always false. + return Constant::getNullValue(ITy); + } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) && + match(RHS, m_SMax(m_Value(C), m_Value(D))) && + (A == C || A == D || B == C || B == D)) { + // min(x, ?) pred max(x, ?). + if (Pred == CmpInst::ICMP_SLE) + // Always true. + return Constant::getAllOnesValue(ITy); + if (Pred == CmpInst::ICMP_SGT) + // Always false. + return Constant::getNullValue(ITy); + } else if (match(LHS, m_UMax(m_Value(A), m_Value(B))) && + match(RHS, m_UMin(m_Value(C), m_Value(D))) && + (A == C || A == D || B == C || B == D)) { + // max(x, ?) pred min(x, ?). + if (Pred == CmpInst::ICMP_UGE) + // Always true. + return Constant::getAllOnesValue(ITy); + if (Pred == CmpInst::ICMP_ULT) + // Always false. + return Constant::getNullValue(ITy); + } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) && + match(RHS, m_UMax(m_Value(C), m_Value(D))) && + (A == C || A == D || B == C || B == D)) { + // min(x, ?) pred max(x, ?). + if (Pred == CmpInst::ICMP_ULE) + // Always true. + return Constant::getAllOnesValue(ITy); + if (Pred == CmpInst::ICMP_UGT) + // Always false. + return Constant::getNullValue(ITy); + } + + // If the comparison is with the result of a select instruction, check whether + // comparing with either branch of the select always yields the same value. + if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS)) + if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse)) + return V; + + // If the comparison is with the result of a phi instruction, check whether + // doing the compare with each incoming phi value yields a common result. + if (isa<PHINode>(LHS) || isa<PHINode>(RHS)) + if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const TargetData *TD, const DominatorTree *DT) { + return ::SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit); +} + +/// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { + CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; + assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!"); + + if (Constant *CLHS = dyn_cast<Constant>(LHS)) { + if (Constant *CRHS = dyn_cast<Constant>(RHS)) + return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD); + + // If we have a constant, make sure it is on the RHS. + std::swap(LHS, RHS); + Pred = CmpInst::getSwappedPredicate(Pred); + } + + // Fold trivial predicates. + if (Pred == FCmpInst::FCMP_FALSE) + return ConstantInt::get(GetCompareTy(LHS), 0); + if (Pred == FCmpInst::FCMP_TRUE) + return ConstantInt::get(GetCompareTy(LHS), 1); + + if (isa<UndefValue>(RHS)) // fcmp pred X, undef -> undef + return UndefValue::get(GetCompareTy(LHS)); + + // fcmp x,x -> true/false. Not all compares are foldable. + if (LHS == RHS) { + if (CmpInst::isTrueWhenEqual(Pred)) + return ConstantInt::get(GetCompareTy(LHS), 1); + if (CmpInst::isFalseWhenEqual(Pred)) + return ConstantInt::get(GetCompareTy(LHS), 0); + } + + // Handle fcmp with constant RHS + if (Constant *RHSC = dyn_cast<Constant>(RHS)) { + // If the constant is a nan, see if we can fold the comparison based on it. + if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) { + if (CFP->getValueAPF().isNaN()) { + if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo" + return ConstantInt::getFalse(CFP->getContext()); + assert(FCmpInst::isUnordered(Pred) && + "Comparison must be either ordered or unordered!"); + // True if unordered. + return ConstantInt::getTrue(CFP->getContext()); + } + // Check whether the constant is an infinity. + if (CFP->getValueAPF().isInfinity()) { + if (CFP->getValueAPF().isNegative()) { + switch (Pred) { + case FCmpInst::FCMP_OLT: + // No value is ordered and less than negative infinity. + return ConstantInt::getFalse(CFP->getContext()); + case FCmpInst::FCMP_UGE: + // All values are unordered with or at least negative infinity. + return ConstantInt::getTrue(CFP->getContext()); + default: + break; + } + } else { + switch (Pred) { + case FCmpInst::FCMP_OGT: + // No value is ordered and greater than infinity. + return ConstantInt::getFalse(CFP->getContext()); + case FCmpInst::FCMP_ULE: + // All values are unordered with and at most infinity. + return ConstantInt::getTrue(CFP->getContext()); + default: + break; + } + } + } + } + } + + // If the comparison is with the result of a select instruction, check whether + // comparing with either branch of the select always yields the same value. + if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS)) + if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse)) + return V; + + // If the comparison is with the result of a phi instruction, check whether + // doing the compare with each incoming phi value yields a common result. + if (isa<PHINode>(LHS) || isa<PHINode>(RHS)) + if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const TargetData *TD, const DominatorTree *DT) { + return ::SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit); +} + +/// SimplifySelectInst - Given operands for a SelectInst, see if we can fold +/// the result. If not, this returns null. +Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal, + const TargetData *TD, const DominatorTree *) { + // select true, X, Y -> X + // select false, X, Y -> Y + if (ConstantInt *CB = dyn_cast<ConstantInt>(CondVal)) + return CB->getZExtValue() ? TrueVal : FalseVal; + + // select C, X, X -> X + if (TrueVal == FalseVal) + return TrueVal; + + if (isa<UndefValue>(CondVal)) { // select undef, X, Y -> X or Y + if (isa<Constant>(TrueVal)) + return TrueVal; + return FalseVal; + } + if (isa<UndefValue>(TrueVal)) // select C, undef, X -> X + return FalseVal; + if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X + return TrueVal; + + return 0; +} + +/// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can +/// fold the result. If not, this returns null. +Value *llvm::SimplifyGEPInst(Value *const *Ops, unsigned NumOps, + const TargetData *TD, const DominatorTree *) { + // The type of the GEP pointer operand. + const PointerType *PtrTy = cast<PointerType>(Ops[0]->getType()); + + // getelementptr P -> P. + if (NumOps == 1) + return Ops[0]; + + if (isa<UndefValue>(Ops[0])) { + // Compute the (pointer) type returned by the GEP instruction. + const Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, &Ops[1], + NumOps-1); + const Type *GEPTy = PointerType::get(LastType, PtrTy->getAddressSpace()); + return UndefValue::get(GEPTy); + } + + if (NumOps == 2) { + // getelementptr P, 0 -> P. + if (ConstantInt *C = dyn_cast<ConstantInt>(Ops[1])) + if (C->isZero()) + return Ops[0]; + // getelementptr P, N -> P if P points to a type of zero size. + if (TD) { + const Type *Ty = PtrTy->getElementType(); + if (Ty->isSized() && TD->getTypeAllocSize(Ty) == 0) + return Ops[0]; + } + } + + // Check to see if this is constant foldable. + for (unsigned i = 0; i != NumOps; ++i) + if (!isa<Constant>(Ops[i])) + return 0; + + return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), + (Constant *const*)Ops+1, NumOps-1); +} + +/// SimplifyPHINode - See if we can fold the given phi. If not, returns null. +static Value *SimplifyPHINode(PHINode *PN, const DominatorTree *DT) { + // If all of the PHI's incoming values are the same then replace the PHI node + // with the common value. + Value *CommonValue = 0; + bool HasUndefInput = false; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *Incoming = PN->getIncomingValue(i); + // If the incoming value is the phi node itself, it can safely be skipped. + if (Incoming == PN) continue; + if (isa<UndefValue>(Incoming)) { + // Remember that we saw an undef value, but otherwise ignore them. + HasUndefInput = true; + continue; + } + if (CommonValue && Incoming != CommonValue) + return 0; // Not the same, bail out. + CommonValue = Incoming; + } + + // If CommonValue is null then all of the incoming values were either undef or + // equal to the phi node itself. + if (!CommonValue) + return UndefValue::get(PN->getType()); + + // If we have a PHI node like phi(X, undef, X), where X is defined by some + // instruction, we cannot return X as the result of the PHI node unless it + // dominates the PHI block. + if (HasUndefInput) + return ValueDominatesPHI(CommonValue, PN, DT) ? CommonValue : 0; + + return CommonValue; +} + + +//=== Helper functions for higher up the class hierarchy. + +/// SimplifyBinOp - Given operands for a BinaryOperator, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { + switch (Opcode) { + case Instruction::Add: + return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, + TD, DT, MaxRecurse); + case Instruction::Sub: + return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, + TD, DT, MaxRecurse); + case Instruction::Mul: return SimplifyMulInst (LHS, RHS, TD, DT, MaxRecurse); + case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, DT, MaxRecurse); + case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, DT, MaxRecurse); + case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, DT, MaxRecurse); + case Instruction::SRem: return SimplifySRemInst(LHS, RHS, TD, DT, MaxRecurse); + case Instruction::URem: return SimplifyURemInst(LHS, RHS, TD, DT, MaxRecurse); + case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, TD, DT, MaxRecurse); + case Instruction::Shl: + return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, + TD, DT, MaxRecurse); + case Instruction::LShr: + return SimplifyLShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse); + case Instruction::AShr: + return SimplifyAShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse); + case Instruction::And: return SimplifyAndInst(LHS, RHS, TD, DT, MaxRecurse); + case Instruction::Or: return SimplifyOrInst (LHS, RHS, TD, DT, MaxRecurse); + case Instruction::Xor: return SimplifyXorInst(LHS, RHS, TD, DT, MaxRecurse); + default: + if (Constant *CLHS = dyn_cast<Constant>(LHS)) + if (Constant *CRHS = dyn_cast<Constant>(RHS)) { + Constant *COps[] = {CLHS, CRHS}; + return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, 2, TD); + } + + // If the operation is associative, try some generic simplifications. + if (Instruction::isAssociative(Opcode)) + if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, TD, DT, + MaxRecurse)) + return V; + + // If the operation is with the result of a select instruction, check whether + // operating on either branch of the select always yields the same value. + if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS)) + if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, TD, DT, + MaxRecurse)) + return V; + + // If the operation is with the result of a phi instruction, check whether + // operating on all incoming values of the phi always yields the same value. + if (isa<PHINode>(LHS) || isa<PHINode>(RHS)) + if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, TD, DT, MaxRecurse)) + return V; + + return 0; + } +} + +Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + const TargetData *TD, const DominatorTree *DT) { + return ::SimplifyBinOp(Opcode, LHS, RHS, TD, DT, RecursionLimit); +} + +/// SimplifyCmpInst - Given operands for a CmpInst, see if we can +/// fold the result. +static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { + if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate)) + return SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse); + return SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse); +} + +Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const TargetData *TD, const DominatorTree *DT) { + return ::SimplifyCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit); +} + +/// SimplifyInstruction - See if we can compute a simplified version of this +/// instruction. If not, this returns null. +Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD, + const DominatorTree *DT) { + Value *Result; + + switch (I->getOpcode()) { + default: + Result = ConstantFoldInstruction(I, TD); + break; + case Instruction::Add: + Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1), + cast<BinaryOperator>(I)->hasNoSignedWrap(), + cast<BinaryOperator>(I)->hasNoUnsignedWrap(), + TD, DT); + break; + case Instruction::Sub: + Result = SimplifySubInst(I->getOperand(0), I->getOperand(1), + cast<BinaryOperator>(I)->hasNoSignedWrap(), + cast<BinaryOperator>(I)->hasNoUnsignedWrap(), + TD, DT); + break; + case Instruction::Mul: + Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; + case Instruction::SDiv: + Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; + case Instruction::UDiv: + Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; + case Instruction::FDiv: + Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; + case Instruction::SRem: + Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; + case Instruction::URem: + Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; + case Instruction::FRem: + Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; + case Instruction::Shl: + Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1), + cast<BinaryOperator>(I)->hasNoSignedWrap(), + cast<BinaryOperator>(I)->hasNoUnsignedWrap(), + TD, DT); + break; + case Instruction::LShr: + Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1), + cast<BinaryOperator>(I)->isExact(), + TD, DT); + break; + case Instruction::AShr: + Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1), + cast<BinaryOperator>(I)->isExact(), + TD, DT); + break; + case Instruction::And: + Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; + case Instruction::Or: + Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; + case Instruction::Xor: + Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; + case Instruction::ICmp: + Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(), + I->getOperand(0), I->getOperand(1), TD, DT); + break; + case Instruction::FCmp: + Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), + I->getOperand(0), I->getOperand(1), TD, DT); + break; + case Instruction::Select: + Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1), + I->getOperand(2), TD, DT); + break; + case Instruction::GetElementPtr: { + SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end()); + Result = SimplifyGEPInst(&Ops[0], Ops.size(), TD, DT); + break; + } + case Instruction::PHI: + Result = SimplifyPHINode(cast<PHINode>(I), DT); + break; + } + + /// If called on unreachable code, the above logic may report that the + /// instruction simplified to itself. Make life easier for users by + /// detecting that case here, returning a safe value instead. + return Result == I ? UndefValue::get(I->getType()) : Result; +} + +/// ReplaceAndSimplifyAllUses - Perform From->replaceAllUsesWith(To) and then +/// delete the From instruction. In addition to a basic RAUW, this does a +/// recursive simplification of the newly formed instructions. This catches +/// things where one simplification exposes other opportunities. This only +/// simplifies and deletes scalar operations, it does not change the CFG. +/// +void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To, + const TargetData *TD, + const DominatorTree *DT) { + assert(From != To && "ReplaceAndSimplifyAllUses(X,X) is not valid!"); + + // FromHandle/ToHandle - This keeps a WeakVH on the from/to values so that + // we can know if it gets deleted out from under us or replaced in a + // recursive simplification. + WeakVH FromHandle(From); + WeakVH ToHandle(To); + + while (!From->use_empty()) { + // Update the instruction to use the new value. + Use &TheUse = From->use_begin().getUse(); + Instruction *User = cast<Instruction>(TheUse.getUser()); + TheUse = To; + + // Check to see if the instruction can be folded due to the operand + // replacement. For example changing (or X, Y) into (or X, -1) can replace + // the 'or' with -1. + Value *SimplifiedVal; + { + // Sanity check to make sure 'User' doesn't dangle across + // SimplifyInstruction. + AssertingVH<> UserHandle(User); + + SimplifiedVal = SimplifyInstruction(User, TD, DT); + if (SimplifiedVal == 0) continue; + } + + // Recursively simplify this user to the new value. + ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD, DT); + From = dyn_cast_or_null<Instruction>((Value*)FromHandle); + To = ToHandle; + + assert(ToHandle && "To value deleted by recursive simplification?"); + + // If the recursive simplification ended up revisiting and deleting + // 'From' then we're done. + if (From == 0) + return; + } + + // If 'From' has value handles referring to it, do a real RAUW to update them. + From->replaceAllUsesWith(To); + + From->eraseFromParent(); +} diff --git a/contrib/llvm/lib/Analysis/Interval.cpp b/contrib/llvm/lib/Analysis/Interval.cpp new file mode 100644 index 0000000..ca9cdca --- /dev/null +++ b/contrib/llvm/lib/Analysis/Interval.cpp @@ -0,0 +1,58 @@ +//===- Interval.cpp - Interval class code ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the definition of the Interval class, which represents a +// partition of a control flow graph of some kind. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Interval.h" +#include "llvm/BasicBlock.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Interval Implementation +//===----------------------------------------------------------------------===// + +// isLoop - Find out if there is a back edge in this interval... +// +bool Interval::isLoop() const { + // There is a loop in this interval iff one of the predecessors of the header + // node lives in the interval. + for (::pred_iterator I = ::pred_begin(HeaderNode), E = ::pred_end(HeaderNode); + I != E; ++I) + if (contains(*I)) + return true; + return false; +} + + +void Interval::print(raw_ostream &OS) const { + OS << "-------------------------------------------------------------\n" + << "Interval Contents:\n"; + + // Print out all of the basic blocks in the interval... + for (std::vector<BasicBlock*>::const_iterator I = Nodes.begin(), + E = Nodes.end(); I != E; ++I) + OS << **I << "\n"; + + OS << "Interval Predecessors:\n"; + for (std::vector<BasicBlock*>::const_iterator I = Predecessors.begin(), + E = Predecessors.end(); I != E; ++I) + OS << **I << "\n"; + + OS << "Interval Successors:\n"; + for (std::vector<BasicBlock*>::const_iterator I = Successors.begin(), + E = Successors.end(); I != E; ++I) + OS << **I << "\n"; +} diff --git a/contrib/llvm/lib/Analysis/IntervalPartition.cpp b/contrib/llvm/lib/Analysis/IntervalPartition.cpp new file mode 100644 index 0000000..2e259b1 --- /dev/null +++ b/contrib/llvm/lib/Analysis/IntervalPartition.cpp @@ -0,0 +1,114 @@ +//===- IntervalPartition.cpp - Interval Partition module code -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the definition of the IntervalPartition class, which +// calculates and represent the interval partition of a function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/IntervalIterator.h" +using namespace llvm; + +char IntervalPartition::ID = 0; +INITIALIZE_PASS(IntervalPartition, "intervals", + "Interval Partition Construction", true, true) + +//===----------------------------------------------------------------------===// +// IntervalPartition Implementation +//===----------------------------------------------------------------------===// + +// releaseMemory - Reset state back to before function was analyzed +void IntervalPartition::releaseMemory() { + for (unsigned i = 0, e = Intervals.size(); i != e; ++i) + delete Intervals[i]; + IntervalMap.clear(); + Intervals.clear(); + RootInterval = 0; +} + +void IntervalPartition::print(raw_ostream &O, const Module*) const { + for(unsigned i = 0, e = Intervals.size(); i != e; ++i) + Intervals[i]->print(O); +} + +// addIntervalToPartition - Add an interval to the internal list of intervals, +// and then add mappings from all of the basic blocks in the interval to the +// interval itself (in the IntervalMap). +// +void IntervalPartition::addIntervalToPartition(Interval *I) { + Intervals.push_back(I); + + // Add mappings for all of the basic blocks in I to the IntervalPartition + for (Interval::node_iterator It = I->Nodes.begin(), End = I->Nodes.end(); + It != End; ++It) + IntervalMap.insert(std::make_pair(*It, I)); +} + +// updatePredecessors - Interval generation only sets the successor fields of +// the interval data structures. After interval generation is complete, +// run through all of the intervals and propagate successor info as +// predecessor info. +// +void IntervalPartition::updatePredecessors(Interval *Int) { + BasicBlock *Header = Int->getHeaderNode(); + for (Interval::succ_iterator I = Int->Successors.begin(), + E = Int->Successors.end(); I != E; ++I) + getBlockInterval(*I)->Predecessors.push_back(Header); +} + +// IntervalPartition ctor - Build the first level interval partition for the +// specified function... +// +bool IntervalPartition::runOnFunction(Function &F) { + // Pass false to intervals_begin because we take ownership of it's memory + function_interval_iterator I = intervals_begin(&F, false); + assert(I != intervals_end(&F) && "No intervals in function!?!?!"); + + addIntervalToPartition(RootInterval = *I); + + ++I; // After the first one... + + // Add the rest of the intervals to the partition. + for (function_interval_iterator E = intervals_end(&F); I != E; ++I) + addIntervalToPartition(*I); + + // Now that we know all of the successor information, propagate this to the + // predecessors for each block. + for (unsigned i = 0, e = Intervals.size(); i != e; ++i) + updatePredecessors(Intervals[i]); + return false; +} + + +// IntervalPartition ctor - Build a reduced interval partition from an +// existing interval graph. This takes an additional boolean parameter to +// distinguish it from a copy constructor. Always pass in false for now. +// +IntervalPartition::IntervalPartition(IntervalPartition &IP, bool) + : FunctionPass(ID) { + assert(IP.getRootInterval() && "Cannot operate on empty IntervalPartitions!"); + + // Pass false to intervals_begin because we take ownership of it's memory + interval_part_interval_iterator I = intervals_begin(IP, false); + assert(I != intervals_end(IP) && "No intervals in interval partition!?!?!"); + + addIntervalToPartition(RootInterval = *I); + + ++I; // After the first one... + + // Add the rest of the intervals to the partition. + for (interval_part_interval_iterator E = intervals_end(IP); I != E; ++I) + addIntervalToPartition(*I); + + // Now that we know all of the successor information, propagate this to the + // predecessors for each block. + for (unsigned i = 0, e = Intervals.size(); i != e; ++i) + updatePredecessors(Intervals[i]); +} + diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp new file mode 100644 index 0000000..6e27597 --- /dev/null +++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp @@ -0,0 +1,1128 @@ +//===- LazyValueInfo.cpp - Value constraint analysis ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for lazy computation of value constraint +// information. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "lazy-value-info" +#include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/ConstantRange.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ValueHandle.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include <map> +#include <stack> +using namespace llvm; + +char LazyValueInfo::ID = 0; +INITIALIZE_PASS(LazyValueInfo, "lazy-value-info", + "Lazy Value Information Analysis", false, true) + +namespace llvm { + FunctionPass *createLazyValueInfoPass() { return new LazyValueInfo(); } +} + + +//===----------------------------------------------------------------------===// +// LVILatticeVal +//===----------------------------------------------------------------------===// + +/// LVILatticeVal - This is the information tracked by LazyValueInfo for each +/// value. +/// +/// FIXME: This is basically just for bringup, this can be made a lot more rich +/// in the future. +/// +namespace { +class LVILatticeVal { + enum LatticeValueTy { + /// undefined - This Value has no known value yet. + undefined, + + /// constant - This Value has a specific constant value. + constant, + /// notconstant - This Value is known to not have the specified value. + notconstant, + + /// constantrange - The Value falls within this range. + constantrange, + + /// overdefined - This value is not known to be constant, and we know that + /// it has a value. + overdefined + }; + + /// Val: This stores the current lattice value along with the Constant* for + /// the constant if this is a 'constant' or 'notconstant' value. + LatticeValueTy Tag; + Constant *Val; + ConstantRange Range; + +public: + LVILatticeVal() : Tag(undefined), Val(0), Range(1, true) {} + + static LVILatticeVal get(Constant *C) { + LVILatticeVal Res; + if (!isa<UndefValue>(C)) + Res.markConstant(C); + return Res; + } + static LVILatticeVal getNot(Constant *C) { + LVILatticeVal Res; + if (!isa<UndefValue>(C)) + Res.markNotConstant(C); + return Res; + } + static LVILatticeVal getRange(ConstantRange CR) { + LVILatticeVal Res; + Res.markConstantRange(CR); + return Res; + } + + bool isUndefined() const { return Tag == undefined; } + bool isConstant() const { return Tag == constant; } + bool isNotConstant() const { return Tag == notconstant; } + bool isConstantRange() const { return Tag == constantrange; } + bool isOverdefined() const { return Tag == overdefined; } + + Constant *getConstant() const { + assert(isConstant() && "Cannot get the constant of a non-constant!"); + return Val; + } + + Constant *getNotConstant() const { + assert(isNotConstant() && "Cannot get the constant of a non-notconstant!"); + return Val; + } + + ConstantRange getConstantRange() const { + assert(isConstantRange() && + "Cannot get the constant-range of a non-constant-range!"); + return Range; + } + + /// markOverdefined - Return true if this is a change in status. + bool markOverdefined() { + if (isOverdefined()) + return false; + Tag = overdefined; + return true; + } + + /// markConstant - Return true if this is a change in status. + bool markConstant(Constant *V) { + assert(V && "Marking constant with NULL"); + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) + return markConstantRange(ConstantRange(CI->getValue())); + if (isa<UndefValue>(V)) + return false; + + assert((!isConstant() || getConstant() == V) && + "Marking constant with different value"); + assert(isUndefined()); + Tag = constant; + Val = V; + return true; + } + + /// markNotConstant - Return true if this is a change in status. + bool markNotConstant(Constant *V) { + assert(V && "Marking constant with NULL"); + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) + return markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue())); + if (isa<UndefValue>(V)) + return false; + + assert((!isConstant() || getConstant() != V) && + "Marking constant !constant with same value"); + assert((!isNotConstant() || getNotConstant() == V) && + "Marking !constant with different value"); + assert(isUndefined() || isConstant()); + Tag = notconstant; + Val = V; + return true; + } + + /// markConstantRange - Return true if this is a change in status. + bool markConstantRange(const ConstantRange NewR) { + if (isConstantRange()) { + if (NewR.isEmptySet()) + return markOverdefined(); + + bool changed = Range == NewR; + Range = NewR; + return changed; + } + + assert(isUndefined()); + if (NewR.isEmptySet()) + return markOverdefined(); + + Tag = constantrange; + Range = NewR; + return true; + } + + /// mergeIn - Merge the specified lattice value into this one, updating this + /// one and returning true if anything changed. + bool mergeIn(const LVILatticeVal &RHS) { + if (RHS.isUndefined() || isOverdefined()) return false; + if (RHS.isOverdefined()) return markOverdefined(); + + if (isUndefined()) { + Tag = RHS.Tag; + Val = RHS.Val; + Range = RHS.Range; + return true; + } + + if (isConstant()) { + if (RHS.isConstant()) { + if (Val == RHS.Val) + return false; + return markOverdefined(); + } + + if (RHS.isNotConstant()) { + if (Val == RHS.Val) + return markOverdefined(); + + // Unless we can prove that the two Constants are different, we must + // move to overdefined. + // FIXME: use TargetData for smarter constant folding. + if (ConstantInt *Res = dyn_cast<ConstantInt>( + ConstantFoldCompareInstOperands(CmpInst::ICMP_NE, + getConstant(), + RHS.getNotConstant()))) + if (Res->isOne()) + return markNotConstant(RHS.getNotConstant()); + + return markOverdefined(); + } + + // RHS is a ConstantRange, LHS is a non-integer Constant. + + // FIXME: consider the case where RHS is a range [1, 0) and LHS is + // a function. The correct result is to pick up RHS. + + return markOverdefined(); + } + + if (isNotConstant()) { + if (RHS.isConstant()) { + if (Val == RHS.Val) + return markOverdefined(); + + // Unless we can prove that the two Constants are different, we must + // move to overdefined. + // FIXME: use TargetData for smarter constant folding. + if (ConstantInt *Res = dyn_cast<ConstantInt>( + ConstantFoldCompareInstOperands(CmpInst::ICMP_NE, + getNotConstant(), + RHS.getConstant()))) + if (Res->isOne()) + return false; + + return markOverdefined(); + } + + if (RHS.isNotConstant()) { + if (Val == RHS.Val) + return false; + return markOverdefined(); + } + + return markOverdefined(); + } + + assert(isConstantRange() && "New LVILattice type?"); + if (!RHS.isConstantRange()) + return markOverdefined(); + + ConstantRange NewR = Range.unionWith(RHS.getConstantRange()); + if (NewR.isFullSet()) + return markOverdefined(); + return markConstantRange(NewR); + } +}; + +} // end anonymous namespace. + +namespace llvm { +raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) + LLVM_ATTRIBUTE_USED; +raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) { + if (Val.isUndefined()) + return OS << "undefined"; + if (Val.isOverdefined()) + return OS << "overdefined"; + + if (Val.isNotConstant()) + return OS << "notconstant<" << *Val.getNotConstant() << '>'; + else if (Val.isConstantRange()) + return OS << "constantrange<" << Val.getConstantRange().getLower() << ", " + << Val.getConstantRange().getUpper() << '>'; + return OS << "constant<" << *Val.getConstant() << '>'; +} +} + +//===----------------------------------------------------------------------===// +// LazyValueInfoCache Decl +//===----------------------------------------------------------------------===// + +namespace { + /// LVIValueHandle - A callback value handle update the cache when + /// values are erased. + class LazyValueInfoCache; + struct LVIValueHandle : public CallbackVH { + LazyValueInfoCache *Parent; + + LVIValueHandle(Value *V, LazyValueInfoCache *P) + : CallbackVH(V), Parent(P) { } + + void deleted(); + void allUsesReplacedWith(Value *V) { + deleted(); + } + }; +} + +namespace llvm { + template<> + struct DenseMapInfo<LVIValueHandle> { + typedef DenseMapInfo<Value*> PointerInfo; + static inline LVIValueHandle getEmptyKey() { + return LVIValueHandle(PointerInfo::getEmptyKey(), + static_cast<LazyValueInfoCache*>(0)); + } + static inline LVIValueHandle getTombstoneKey() { + return LVIValueHandle(PointerInfo::getTombstoneKey(), + static_cast<LazyValueInfoCache*>(0)); + } + static unsigned getHashValue(const LVIValueHandle &Val) { + return PointerInfo::getHashValue(Val); + } + static bool isEqual(const LVIValueHandle &LHS, const LVIValueHandle &RHS) { + return LHS == RHS; + } + }; + + template<> + struct DenseMapInfo<std::pair<AssertingVH<BasicBlock>, Value*> > { + typedef std::pair<AssertingVH<BasicBlock>, Value*> PairTy; + typedef DenseMapInfo<AssertingVH<BasicBlock> > APointerInfo; + typedef DenseMapInfo<Value*> BPointerInfo; + static inline PairTy getEmptyKey() { + return std::make_pair(APointerInfo::getEmptyKey(), + BPointerInfo::getEmptyKey()); + } + static inline PairTy getTombstoneKey() { + return std::make_pair(APointerInfo::getTombstoneKey(), + BPointerInfo::getTombstoneKey()); + } + static unsigned getHashValue( const PairTy &Val) { + return APointerInfo::getHashValue(Val.first) ^ + BPointerInfo::getHashValue(Val.second); + } + static bool isEqual(const PairTy &LHS, const PairTy &RHS) { + return APointerInfo::isEqual(LHS.first, RHS.first) && + BPointerInfo::isEqual(LHS.second, RHS.second); + } + }; +} + +namespace { + /// LazyValueInfoCache - This is the cache kept by LazyValueInfo which + /// maintains information about queries across the clients' queries. + class LazyValueInfoCache { + /// ValueCacheEntryTy - This is all of the cached block information for + /// exactly one Value*. The entries are sorted by the BasicBlock* of the + /// entries, allowing us to do a lookup with a binary search. + typedef std::map<AssertingVH<BasicBlock>, LVILatticeVal> ValueCacheEntryTy; + + /// ValueCache - This is all of the cached information for all values, + /// mapped from Value* to key information. + DenseMap<LVIValueHandle, ValueCacheEntryTy> ValueCache; + + /// OverDefinedCache - This tracks, on a per-block basis, the set of + /// values that are over-defined at the end of that block. This is required + /// for cache updating. + typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy; + DenseSet<OverDefinedPairTy> OverDefinedCache; + + /// BlockValueStack - This stack holds the state of the value solver + /// during a query. It basically emulates the callstack of the naive + /// recursive value lookup process. + std::stack<std::pair<BasicBlock*, Value*> > BlockValueStack; + + friend struct LVIValueHandle; + + /// OverDefinedCacheUpdater - A helper object that ensures that the + /// OverDefinedCache is updated whenever solveBlockValue returns. + struct OverDefinedCacheUpdater { + LazyValueInfoCache *Parent; + Value *Val; + BasicBlock *BB; + LVILatticeVal &BBLV; + + OverDefinedCacheUpdater(Value *V, BasicBlock *B, LVILatticeVal &LV, + LazyValueInfoCache *P) + : Parent(P), Val(V), BB(B), BBLV(LV) { } + + bool markResult(bool changed) { + if (changed && BBLV.isOverdefined()) + Parent->OverDefinedCache.insert(std::make_pair(BB, Val)); + return changed; + } + }; + + + + LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB); + bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T, + LVILatticeVal &Result); + bool hasBlockValue(Value *Val, BasicBlock *BB); + + // These methods process one work item and may add more. A false value + // returned means that the work item was not completely processed and must + // be revisited after going through the new items. + bool solveBlockValue(Value *Val, BasicBlock *BB); + bool solveBlockValueNonLocal(LVILatticeVal &BBLV, + Value *Val, BasicBlock *BB); + bool solveBlockValuePHINode(LVILatticeVal &BBLV, + PHINode *PN, BasicBlock *BB); + bool solveBlockValueConstantRange(LVILatticeVal &BBLV, + Instruction *BBI, BasicBlock *BB); + + void solve(); + + ValueCacheEntryTy &lookup(Value *V) { + return ValueCache[LVIValueHandle(V, this)]; + } + + public: + /// getValueInBlock - This is the query interface to determine the lattice + /// value for the specified Value* at the end of the specified block. + LVILatticeVal getValueInBlock(Value *V, BasicBlock *BB); + + /// getValueOnEdge - This is the query interface to determine the lattice + /// value for the specified Value* that is true on the specified edge. + LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB); + + /// threadEdge - This is the update interface to inform the cache that an + /// edge from PredBB to OldSucc has been threaded to be from PredBB to + /// NewSucc. + void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc); + + /// eraseBlock - This is part of the update interface to inform the cache + /// that a block has been deleted. + void eraseBlock(BasicBlock *BB); + + /// clear - Empty the cache. + void clear() { + ValueCache.clear(); + OverDefinedCache.clear(); + } + }; +} // end anonymous namespace + +void LVIValueHandle::deleted() { + typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy; + + SmallVector<OverDefinedPairTy, 4> ToErase; + for (DenseSet<OverDefinedPairTy>::iterator + I = Parent->OverDefinedCache.begin(), + E = Parent->OverDefinedCache.end(); + I != E; ++I) { + if (I->second == getValPtr()) + ToErase.push_back(*I); + } + + for (SmallVector<OverDefinedPairTy, 4>::iterator I = ToErase.begin(), + E = ToErase.end(); I != E; ++I) + Parent->OverDefinedCache.erase(*I); + + // This erasure deallocates *this, so it MUST happen after we're done + // using any and all members of *this. + Parent->ValueCache.erase(*this); +} + +void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { + SmallVector<OverDefinedPairTy, 4> ToErase; + for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(), + E = OverDefinedCache.end(); I != E; ++I) { + if (I->first == BB) + ToErase.push_back(*I); + } + + for (SmallVector<OverDefinedPairTy, 4>::iterator I = ToErase.begin(), + E = ToErase.end(); I != E; ++I) + OverDefinedCache.erase(*I); + + for (DenseMap<LVIValueHandle, ValueCacheEntryTy>::iterator + I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I) + I->second.erase(BB); +} + +void LazyValueInfoCache::solve() { + while (!BlockValueStack.empty()) { + std::pair<BasicBlock*, Value*> &e = BlockValueStack.top(); + if (solveBlockValue(e.second, e.first)) + BlockValueStack.pop(); + } +} + +bool LazyValueInfoCache::hasBlockValue(Value *Val, BasicBlock *BB) { + // If already a constant, there is nothing to compute. + if (isa<Constant>(Val)) + return true; + + LVIValueHandle ValHandle(Val, this); + if (!ValueCache.count(ValHandle)) return false; + return ValueCache[ValHandle].count(BB); +} + +LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) { + // If already a constant, there is nothing to compute. + if (Constant *VC = dyn_cast<Constant>(Val)) + return LVILatticeVal::get(VC); + + return lookup(Val)[BB]; +} + +bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { + if (isa<Constant>(Val)) + return true; + + ValueCacheEntryTy &Cache = lookup(Val); + LVILatticeVal &BBLV = Cache[BB]; + + // OverDefinedCacheUpdater is a helper object that will update + // the OverDefinedCache for us when this method exits. Make sure to + // call markResult on it as we exist, passing a bool to indicate if the + // cache needs updating, i.e. if we have solve a new value or not. + OverDefinedCacheUpdater ODCacheUpdater(Val, BB, BBLV, this); + + // If we've already computed this block's value, return it. + if (!BBLV.isUndefined()) { + DEBUG(dbgs() << " reuse BB '" << BB->getName() << "' val=" << BBLV <<'\n'); + + // Since we're reusing a cached value here, we don't need to update the + // OverDefinedCahce. The cache will have been properly updated + // whenever the cached value was inserted. + ODCacheUpdater.markResult(false); + return true; + } + + // Otherwise, this is the first time we're seeing this block. Reset the + // lattice value to overdefined, so that cycles will terminate and be + // conservatively correct. + BBLV.markOverdefined(); + + Instruction *BBI = dyn_cast<Instruction>(Val); + if (BBI == 0 || BBI->getParent() != BB) { + return ODCacheUpdater.markResult(solveBlockValueNonLocal(BBLV, Val, BB)); + } + + if (PHINode *PN = dyn_cast<PHINode>(BBI)) { + return ODCacheUpdater.markResult(solveBlockValuePHINode(BBLV, PN, BB)); + } + + if (AllocaInst *AI = dyn_cast<AllocaInst>(BBI)) { + BBLV = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType())); + return ODCacheUpdater.markResult(true); + } + + // We can only analyze the definitions of certain classes of instructions + // (integral binops and casts at the moment), so bail if this isn't one. + LVILatticeVal Result; + if ((!isa<BinaryOperator>(BBI) && !isa<CastInst>(BBI)) || + !BBI->getType()->isIntegerTy()) { + DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined because inst def found.\n"); + BBLV.markOverdefined(); + return ODCacheUpdater.markResult(true); + } + + // FIXME: We're currently limited to binops with a constant RHS. This should + // be improved. + BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI); + if (BO && !isa<ConstantInt>(BO->getOperand(1))) { + DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined because inst def found.\n"); + + BBLV.markOverdefined(); + return ODCacheUpdater.markResult(true); + } + + return ODCacheUpdater.markResult(solveBlockValueConstantRange(BBLV, BBI, BB)); +} + +static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) { + if (LoadInst *L = dyn_cast<LoadInst>(I)) { + return L->getPointerAddressSpace() == 0 && + GetUnderlyingObject(L->getPointerOperand()) == + GetUnderlyingObject(Ptr); + } + if (StoreInst *S = dyn_cast<StoreInst>(I)) { + return S->getPointerAddressSpace() == 0 && + GetUnderlyingObject(S->getPointerOperand()) == + GetUnderlyingObject(Ptr); + } + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { + if (MI->isVolatile()) return false; + + // FIXME: check whether it has a valuerange that excludes zero? + ConstantInt *Len = dyn_cast<ConstantInt>(MI->getLength()); + if (!Len || Len->isZero()) return false; + + if (MI->getDestAddressSpace() == 0) + if (MI->getRawDest() == Ptr || MI->getDest() == Ptr) + return true; + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) + if (MTI->getSourceAddressSpace() == 0) + if (MTI->getRawSource() == Ptr || MTI->getSource() == Ptr) + return true; + } + return false; +} + +bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV, + Value *Val, BasicBlock *BB) { + LVILatticeVal Result; // Start Undefined. + + // If this is a pointer, and there's a load from that pointer in this BB, + // then we know that the pointer can't be NULL. + bool NotNull = false; + if (Val->getType()->isPointerTy()) { + if (isa<AllocaInst>(Val)) { + NotNull = true; + } else { + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();BI != BE;++BI){ + if (InstructionDereferencesPointer(BI, Val)) { + NotNull = true; + break; + } + } + } + } + + // If this is the entry block, we must be asking about an argument. The + // value is overdefined. + if (BB == &BB->getParent()->getEntryBlock()) { + assert(isa<Argument>(Val) && "Unknown live-in to the entry block"); + if (NotNull) { + const PointerType *PTy = cast<PointerType>(Val->getType()); + Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy)); + } else { + Result.markOverdefined(); + } + BBLV = Result; + return true; + } + + // Loop over all of our predecessors, merging what we know from them into + // result. + bool EdgesMissing = false; + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + LVILatticeVal EdgeResult; + EdgesMissing |= !getEdgeValue(Val, *PI, BB, EdgeResult); + if (EdgesMissing) + continue; + + Result.mergeIn(EdgeResult); + + // If we hit overdefined, exit early. The BlockVals entry is already set + // to overdefined. + if (Result.isOverdefined()) { + DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined because of pred.\n"); + // If we previously determined that this is a pointer that can't be null + // then return that rather than giving up entirely. + if (NotNull) { + const PointerType *PTy = cast<PointerType>(Val->getType()); + Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy)); + } + + BBLV = Result; + return true; + } + } + if (EdgesMissing) + return false; + + // Return the merged value, which is more precise than 'overdefined'. + assert(!Result.isOverdefined()); + BBLV = Result; + return true; +} + +bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV, + PHINode *PN, BasicBlock *BB) { + LVILatticeVal Result; // Start Undefined. + + // Loop over all of our predecessors, merging what we know from them into + // result. + bool EdgesMissing = false; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *PhiBB = PN->getIncomingBlock(i); + Value *PhiVal = PN->getIncomingValue(i); + LVILatticeVal EdgeResult; + EdgesMissing |= !getEdgeValue(PhiVal, PhiBB, BB, EdgeResult); + if (EdgesMissing) + continue; + + Result.mergeIn(EdgeResult); + + // If we hit overdefined, exit early. The BlockVals entry is already set + // to overdefined. + if (Result.isOverdefined()) { + DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined because of pred.\n"); + + BBLV = Result; + return true; + } + } + if (EdgesMissing) + return false; + + // Return the merged value, which is more precise than 'overdefined'. + assert(!Result.isOverdefined() && "Possible PHI in entry block?"); + BBLV = Result; + return true; +} + +bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV, + Instruction *BBI, + BasicBlock *BB) { + // Figure out the range of the LHS. If that fails, bail. + if (!hasBlockValue(BBI->getOperand(0), BB)) { + BlockValueStack.push(std::make_pair(BB, BBI->getOperand(0))); + return false; + } + + LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB); + if (!LHSVal.isConstantRange()) { + BBLV.markOverdefined(); + return true; + } + + ConstantRange LHSRange = LHSVal.getConstantRange(); + ConstantRange RHSRange(1); + const IntegerType *ResultTy = cast<IntegerType>(BBI->getType()); + if (isa<BinaryOperator>(BBI)) { + if (ConstantInt *RHS = dyn_cast<ConstantInt>(BBI->getOperand(1))) { + RHSRange = ConstantRange(RHS->getValue()); + } else { + BBLV.markOverdefined(); + return true; + } + } + + // NOTE: We're currently limited by the set of operations that ConstantRange + // can evaluate symbolically. Enhancing that set will allows us to analyze + // more definitions. + LVILatticeVal Result; + switch (BBI->getOpcode()) { + case Instruction::Add: + Result.markConstantRange(LHSRange.add(RHSRange)); + break; + case Instruction::Sub: + Result.markConstantRange(LHSRange.sub(RHSRange)); + break; + case Instruction::Mul: + Result.markConstantRange(LHSRange.multiply(RHSRange)); + break; + case Instruction::UDiv: + Result.markConstantRange(LHSRange.udiv(RHSRange)); + break; + case Instruction::Shl: + Result.markConstantRange(LHSRange.shl(RHSRange)); + break; + case Instruction::LShr: + Result.markConstantRange(LHSRange.lshr(RHSRange)); + break; + case Instruction::Trunc: + Result.markConstantRange(LHSRange.truncate(ResultTy->getBitWidth())); + break; + case Instruction::SExt: + Result.markConstantRange(LHSRange.signExtend(ResultTy->getBitWidth())); + break; + case Instruction::ZExt: + Result.markConstantRange(LHSRange.zeroExtend(ResultTy->getBitWidth())); + break; + case Instruction::BitCast: + Result.markConstantRange(LHSRange); + break; + case Instruction::And: + Result.markConstantRange(LHSRange.binaryAnd(RHSRange)); + break; + case Instruction::Or: + Result.markConstantRange(LHSRange.binaryOr(RHSRange)); + break; + + // Unhandled instructions are overdefined. + default: + DEBUG(dbgs() << " compute BB '" << BB->getName() + << "' - overdefined because inst def found.\n"); + Result.markOverdefined(); + break; + } + + BBLV = Result; + return true; +} + +/// getEdgeValue - This method attempts to infer more complex +bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, + BasicBlock *BBTo, LVILatticeVal &Result) { + // If already a constant, there is nothing to compute. + if (Constant *VC = dyn_cast<Constant>(Val)) { + Result = LVILatticeVal::get(VC); + return true; + } + + // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we + // know that v != 0. + if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) { + // If this is a conditional branch and only one successor goes to BBTo, then + // we maybe able to infer something from the condition. + if (BI->isConditional() && + BI->getSuccessor(0) != BI->getSuccessor(1)) { + bool isTrueDest = BI->getSuccessor(0) == BBTo; + assert(BI->getSuccessor(!isTrueDest) == BBTo && + "BBTo isn't a successor of BBFrom"); + + // If V is the condition of the branch itself, then we know exactly what + // it is. + if (BI->getCondition() == Val) { + Result = LVILatticeVal::get(ConstantInt::get( + Type::getInt1Ty(Val->getContext()), isTrueDest)); + return true; + } + + // If the condition of the branch is an equality comparison, we may be + // able to infer the value. + ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()); + if (ICI && ICI->getOperand(0) == Val && + isa<Constant>(ICI->getOperand(1))) { + if (ICI->isEquality()) { + // We know that V has the RHS constant if this is a true SETEQ or + // false SETNE. + if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ)) + Result = LVILatticeVal::get(cast<Constant>(ICI->getOperand(1))); + else + Result = LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1))); + return true; + } + + if (ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1))) { + // Calculate the range of values that would satisfy the comparison. + ConstantRange CmpRange(CI->getValue(), CI->getValue()+1); + ConstantRange TrueValues = + ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange); + + // If we're interested in the false dest, invert the condition. + if (!isTrueDest) TrueValues = TrueValues.inverse(); + + // Figure out the possible values of the query BEFORE this branch. + if (!hasBlockValue(Val, BBFrom)) { + BlockValueStack.push(std::make_pair(BBFrom, Val)); + return false; + } + + LVILatticeVal InBlock = getBlockValue(Val, BBFrom); + if (!InBlock.isConstantRange()) { + Result = LVILatticeVal::getRange(TrueValues); + return true; + } + + // Find all potential values that satisfy both the input and output + // conditions. + ConstantRange PossibleValues = + TrueValues.intersectWith(InBlock.getConstantRange()); + + Result = LVILatticeVal::getRange(PossibleValues); + return true; + } + } + } + } + + // If the edge was formed by a switch on the value, then we may know exactly + // what it is. + if (SwitchInst *SI = dyn_cast<SwitchInst>(BBFrom->getTerminator())) { + if (SI->getCondition() == Val) { + // We don't know anything in the default case. + if (SI->getDefaultDest() == BBTo) { + Result.markOverdefined(); + return true; + } + + // We only know something if there is exactly one value that goes from + // BBFrom to BBTo. + unsigned NumEdges = 0; + ConstantInt *EdgeVal = 0; + for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) { + if (SI->getSuccessor(i) != BBTo) continue; + if (NumEdges++) break; + EdgeVal = SI->getCaseValue(i); + } + assert(EdgeVal && "Missing successor?"); + if (NumEdges == 1) { + Result = LVILatticeVal::get(EdgeVal); + return true; + } + } + } + + // Otherwise see if the value is known in the block. + if (hasBlockValue(Val, BBFrom)) { + Result = getBlockValue(Val, BBFrom); + return true; + } + BlockValueStack.push(std::make_pair(BBFrom, Val)); + return false; +} + +LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB) { + DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '" + << BB->getName() << "'\n"); + + BlockValueStack.push(std::make_pair(BB, V)); + solve(); + LVILatticeVal Result = getBlockValue(V, BB); + + DEBUG(dbgs() << " Result = " << Result << "\n"); + return Result; +} + +LVILatticeVal LazyValueInfoCache:: +getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB) { + DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '" + << FromBB->getName() << "' to '" << ToBB->getName() << "'\n"); + + LVILatticeVal Result; + if (!getEdgeValue(V, FromBB, ToBB, Result)) { + solve(); + bool WasFastQuery = getEdgeValue(V, FromBB, ToBB, Result); + (void)WasFastQuery; + assert(WasFastQuery && "More work to do after problem solved?"); + } + + DEBUG(dbgs() << " Result = " << Result << "\n"); + return Result; +} + +void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, + BasicBlock *NewSucc) { + // When an edge in the graph has been threaded, values that we could not + // determine a value for before (i.e. were marked overdefined) may be possible + // to solve now. We do NOT try to proactively update these values. Instead, + // we clear their entries from the cache, and allow lazy updating to recompute + // them when needed. + + // The updating process is fairly simple: we need to dropped cached info + // for all values that were marked overdefined in OldSucc, and for those same + // values in any successor of OldSucc (except NewSucc) in which they were + // also marked overdefined. + std::vector<BasicBlock*> worklist; + worklist.push_back(OldSucc); + + DenseSet<Value*> ClearSet; + for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(), + E = OverDefinedCache.end(); I != E; ++I) { + if (I->first == OldSucc) + ClearSet.insert(I->second); + } + + // Use a worklist to perform a depth-first search of OldSucc's successors. + // NOTE: We do not need a visited list since any blocks we have already + // visited will have had their overdefined markers cleared already, and we + // thus won't loop to their successors. + while (!worklist.empty()) { + BasicBlock *ToUpdate = worklist.back(); + worklist.pop_back(); + + // Skip blocks only accessible through NewSucc. + if (ToUpdate == NewSucc) continue; + + bool changed = false; + for (DenseSet<Value*>::iterator I = ClearSet.begin(), E = ClearSet.end(); + I != E; ++I) { + // If a value was marked overdefined in OldSucc, and is here too... + DenseSet<OverDefinedPairTy>::iterator OI = + OverDefinedCache.find(std::make_pair(ToUpdate, *I)); + if (OI == OverDefinedCache.end()) continue; + + // Remove it from the caches. + ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(*I, this)]; + ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate); + + assert(CI != Entry.end() && "Couldn't find entry to update?"); + Entry.erase(CI); + OverDefinedCache.erase(OI); + + // If we removed anything, then we potentially need to update + // blocks successors too. + changed = true; + } + + if (!changed) continue; + + worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate)); + } +} + +//===----------------------------------------------------------------------===// +// LazyValueInfo Impl +//===----------------------------------------------------------------------===// + +/// getCache - This lazily constructs the LazyValueInfoCache. +static LazyValueInfoCache &getCache(void *&PImpl) { + if (!PImpl) + PImpl = new LazyValueInfoCache(); + return *static_cast<LazyValueInfoCache*>(PImpl); +} + +bool LazyValueInfo::runOnFunction(Function &F) { + if (PImpl) + getCache(PImpl).clear(); + + TD = getAnalysisIfAvailable<TargetData>(); + // Fully lazy. + return false; +} + +void LazyValueInfo::releaseMemory() { + // If the cache was allocated, free it. + if (PImpl) { + delete &getCache(PImpl); + PImpl = 0; + } +} + +Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) { + LVILatticeVal Result = getCache(PImpl).getValueInBlock(V, BB); + + if (Result.isConstant()) + return Result.getConstant(); + if (Result.isConstantRange()) { + ConstantRange CR = Result.getConstantRange(); + if (const APInt *SingleVal = CR.getSingleElement()) + return ConstantInt::get(V->getContext(), *SingleVal); + } + return 0; +} + +/// getConstantOnEdge - Determine whether the specified value is known to be a +/// constant on the specified edge. Return null if not. +Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, + BasicBlock *ToBB) { + LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB); + + if (Result.isConstant()) + return Result.getConstant(); + if (Result.isConstantRange()) { + ConstantRange CR = Result.getConstantRange(); + if (const APInt *SingleVal = CR.getSingleElement()) + return ConstantInt::get(V->getContext(), *SingleVal); + } + return 0; +} + +/// getPredicateOnEdge - Determine whether the specified value comparison +/// with a constant is known to be true or false on the specified CFG edge. +/// Pred is a CmpInst predicate. +LazyValueInfo::Tristate +LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, + BasicBlock *FromBB, BasicBlock *ToBB) { + LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB); + + // If we know the value is a constant, evaluate the conditional. + Constant *Res = 0; + if (Result.isConstant()) { + Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD); + if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res)) + return ResCI->isZero() ? False : True; + return Unknown; + } + + if (Result.isConstantRange()) { + ConstantInt *CI = dyn_cast<ConstantInt>(C); + if (!CI) return Unknown; + + ConstantRange CR = Result.getConstantRange(); + if (Pred == ICmpInst::ICMP_EQ) { + if (!CR.contains(CI->getValue())) + return False; + + if (CR.isSingleElement() && CR.contains(CI->getValue())) + return True; + } else if (Pred == ICmpInst::ICMP_NE) { + if (!CR.contains(CI->getValue())) + return True; + + if (CR.isSingleElement() && CR.contains(CI->getValue())) + return False; + } + + // Handle more complex predicates. + ConstantRange TrueValues = + ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue()); + if (TrueValues.contains(CR)) + return True; + if (TrueValues.inverse().contains(CR)) + return False; + return Unknown; + } + + if (Result.isNotConstant()) { + // If this is an equality comparison, we can try to fold it knowing that + // "V != C1". + if (Pred == ICmpInst::ICMP_EQ) { + // !C1 == C -> false iff C1 == C. + Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, + Result.getNotConstant(), C, TD); + if (Res->isNullValue()) + return False; + } else if (Pred == ICmpInst::ICMP_NE) { + // !C1 != C -> true iff C1 == C. + Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, + Result.getNotConstant(), C, TD); + if (Res->isNullValue()) + return True; + } + return Unknown; + } + + return Unknown; +} + +void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, + BasicBlock *NewSucc) { + if (PImpl) getCache(PImpl).threadEdge(PredBB, OldSucc, NewSucc); +} + +void LazyValueInfo::eraseBlock(BasicBlock *BB) { + if (PImpl) getCache(PImpl).eraseBlock(BB); +} diff --git a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp new file mode 100644 index 0000000..efb722b --- /dev/null +++ b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp @@ -0,0 +1,137 @@ +//===- LibCallAliasAnalysis.cpp - Implement AliasAnalysis for libcalls ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LibCallAliasAnalysis class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LibCallAliasAnalysis.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/Function.h" +#include "llvm/Pass.h" +using namespace llvm; + +// Register this pass... +char LibCallAliasAnalysis::ID = 0; +INITIALIZE_AG_PASS(LibCallAliasAnalysis, AliasAnalysis, "libcall-aa", + "LibCall Alias Analysis", false, true, false) + +FunctionPass *llvm::createLibCallAliasAnalysisPass(LibCallInfo *LCI) { + return new LibCallAliasAnalysis(LCI); +} + +LibCallAliasAnalysis::~LibCallAliasAnalysis() { + delete LCI; +} + +void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AliasAnalysis::getAnalysisUsage(AU); + AU.setPreservesAll(); // Does not transform code +} + + + +/// AnalyzeLibCallDetails - Given a call to a function with the specified +/// LibCallFunctionInfo, see if we can improve the mod/ref footprint of the call +/// vs the specified pointer/size. +AliasAnalysis::ModRefResult +LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI, + ImmutableCallSite CS, + const Location &Loc) { + // If we have a function, check to see what kind of mod/ref effects it + // has. Start by including any info globally known about the function. + AliasAnalysis::ModRefResult MRInfo = FI->UniversalBehavior; + if (MRInfo == NoModRef) return MRInfo; + + // If that didn't tell us that the function is 'readnone', check to see + // if we have detailed info and if 'P' is any of the locations we know + // about. + const LibCallFunctionInfo::LocationMRInfo *Details = FI->LocationDetails; + if (Details == 0) + return MRInfo; + + // If the details array is of the 'DoesNot' kind, we only know something if + // the pointer is a match for one of the locations in 'Details'. If we find a + // match, we can prove some interactions cannot happen. + // + if (FI->DetailsType == LibCallFunctionInfo::DoesNot) { + // Find out if the pointer refers to a known location. + for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) { + const LibCallLocationInfo &LocInfo = + LCI->getLocationInfo(Details[i].LocationID); + LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc); + if (Res != LibCallLocationInfo::Yes) continue; + + // If we find a match against a location that we 'do not' interact with, + // learn this info into MRInfo. + return ModRefResult(MRInfo & ~Details[i].MRInfo); + } + return MRInfo; + } + + // If the details are of the 'DoesOnly' sort, we know something if the pointer + // is a match for one of the locations in 'Details'. Also, if we can prove + // that the pointers is *not* one of the locations in 'Details', we know that + // the call is NoModRef. + assert(FI->DetailsType == LibCallFunctionInfo::DoesOnly); + + // Find out if the pointer refers to a known location. + bool NoneMatch = true; + for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) { + const LibCallLocationInfo &LocInfo = + LCI->getLocationInfo(Details[i].LocationID); + LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc); + if (Res == LibCallLocationInfo::No) continue; + + // If we don't know if this pointer points to the location, then we have to + // assume it might alias in some case. + if (Res == LibCallLocationInfo::Unknown) { + NoneMatch = false; + continue; + } + + // If we know that this pointer definitely is pointing into the location, + // merge in this information. + return ModRefResult(MRInfo & Details[i].MRInfo); + } + + // If we found that the pointer is guaranteed to not match any of the + // locations in our 'DoesOnly' rule, then we know that the pointer must point + // to some other location. Since the libcall doesn't mod/ref any other + // locations, return NoModRef. + if (NoneMatch) + return NoModRef; + + // Otherwise, return any other info gained so far. + return MRInfo; +} + +// getModRefInfo - Check to see if the specified callsite can clobber the +// specified memory object. +// +AliasAnalysis::ModRefResult +LibCallAliasAnalysis::getModRefInfo(ImmutableCallSite CS, + const Location &Loc) { + ModRefResult MRInfo = ModRef; + + // If this is a direct call to a function that LCI knows about, get the + // information about the runtime function. + if (LCI) { + if (const Function *F = CS.getCalledFunction()) { + if (const LibCallFunctionInfo *FI = LCI->getFunctionInfo(F)) { + MRInfo = ModRefResult(MRInfo & AnalyzeLibCallDetails(FI, CS, Loc)); + if (MRInfo == NoModRef) return NoModRef; + } + } + } + + // The AliasAnalysis base class has some smarts, lets use them. + return (ModRefResult)(MRInfo | AliasAnalysis::getModRefInfo(CS, Loc)); +} diff --git a/contrib/llvm/lib/Analysis/LibCallSemantics.cpp b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp new file mode 100644 index 0000000..81b0f46 --- /dev/null +++ b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp @@ -0,0 +1,63 @@ +//===- LibCallSemantics.cpp - Describe library semantics ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements interfaces that can be used to describe language +// specific runtime library interfaces (e.g. libc, libm, etc) to LLVM +// optimizers. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LibCallSemantics.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Function.h" +using namespace llvm; + +/// getMap - This impl pointer in ~LibCallInfo is actually a StringMap. This +/// helper does the cast. +static StringMap<const LibCallFunctionInfo*> *getMap(void *Ptr) { + return static_cast<StringMap<const LibCallFunctionInfo*> *>(Ptr); +} + +LibCallInfo::~LibCallInfo() { + delete getMap(Impl); +} + +const LibCallLocationInfo &LibCallInfo::getLocationInfo(unsigned LocID) const { + // Get location info on the first call. + if (NumLocations == 0) + NumLocations = getLocationInfo(Locations); + + assert(LocID < NumLocations && "Invalid location ID!"); + return Locations[LocID]; +} + + +/// getFunctionInfo - Return the LibCallFunctionInfo object corresponding to +/// the specified function if we have it. If not, return null. +const LibCallFunctionInfo * +LibCallInfo::getFunctionInfo(const Function *F) const { + StringMap<const LibCallFunctionInfo*> *Map = getMap(Impl); + + /// If this is the first time we are querying for this info, lazily construct + /// the StringMap to index it. + if (Map == 0) { + Impl = Map = new StringMap<const LibCallFunctionInfo*>(); + + const LibCallFunctionInfo *Array = getFunctionInfoArray(); + if (Array == 0) return 0; + + // We now have the array of entries. Populate the StringMap. + for (unsigned i = 0; Array[i].Name; ++i) + (*Map)[Array[i].Name] = Array+i; + } + + // Look up this function in the string map. + return Map->lookup(F->getName()); +} + diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp new file mode 100644 index 0000000..89755da --- /dev/null +++ b/contrib/llvm/lib/Analysis/Lint.cpp @@ -0,0 +1,655 @@ +//===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass statically checks for common and easily-identified constructs +// which produce undefined or likely unintended behavior in LLVM IR. +// +// It is not a guarantee of correctness, in two ways. First, it isn't +// comprehensive. There are checks which could be done statically which are +// not yet implemented. Some of these are indicated by TODO comments, but +// those aren't comprehensive either. Second, many conditions cannot be +// checked statically. This pass does no dynamic instrumentation, so it +// can't check for all possible problems. +// +// Another limitation is that it assumes all code will be executed. A store +// through a null pointer in a basic block which is never reached is harmless, +// but this pass will warn about it anyway. This is the main reason why most +// of these checks live here instead of in the Verifier pass. +// +// Optimization passes may make conditions that this pass checks for more or +// less obvious. If an optimization pass appears to be introducing a warning, +// it may be that the optimization pass is merely exposing an existing +// condition in the code. +// +// This code may be run before instcombine. In many cases, instcombine checks +// for the same kinds of things and turns instructions with undefined behavior +// into unreachable (or equivalent). Because of this, this pass makes some +// effort to look through bitcasts and so on. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/Lint.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Pass.h" +#include "llvm/PassManager.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Function.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/InstVisitor.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +namespace { + namespace MemRef { + static unsigned Read = 1; + static unsigned Write = 2; + static unsigned Callee = 4; + static unsigned Branchee = 8; + } + + class Lint : public FunctionPass, public InstVisitor<Lint> { + friend class InstVisitor<Lint>; + + void visitFunction(Function &F); + + void visitCallSite(CallSite CS); + void visitMemoryReference(Instruction &I, Value *Ptr, + uint64_t Size, unsigned Align, + const Type *Ty, unsigned Flags); + + void visitCallInst(CallInst &I); + void visitInvokeInst(InvokeInst &I); + void visitReturnInst(ReturnInst &I); + void visitLoadInst(LoadInst &I); + void visitStoreInst(StoreInst &I); + void visitXor(BinaryOperator &I); + void visitSub(BinaryOperator &I); + void visitLShr(BinaryOperator &I); + void visitAShr(BinaryOperator &I); + void visitShl(BinaryOperator &I); + void visitSDiv(BinaryOperator &I); + void visitUDiv(BinaryOperator &I); + void visitSRem(BinaryOperator &I); + void visitURem(BinaryOperator &I); + void visitAllocaInst(AllocaInst &I); + void visitVAArgInst(VAArgInst &I); + void visitIndirectBrInst(IndirectBrInst &I); + void visitExtractElementInst(ExtractElementInst &I); + void visitInsertElementInst(InsertElementInst &I); + void visitUnreachableInst(UnreachableInst &I); + + Value *findValue(Value *V, bool OffsetOk) const; + Value *findValueImpl(Value *V, bool OffsetOk, + SmallPtrSet<Value *, 4> &Visited) const; + + public: + Module *Mod; + AliasAnalysis *AA; + DominatorTree *DT; + TargetData *TD; + + std::string Messages; + raw_string_ostream MessagesStr; + + static char ID; // Pass identification, replacement for typeid + Lint() : FunctionPass(ID), MessagesStr(Messages) { + initializeLintPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<AliasAnalysis>(); + AU.addRequired<DominatorTree>(); + } + virtual void print(raw_ostream &O, const Module *M) const {} + + void WriteValue(const Value *V) { + if (!V) return; + if (isa<Instruction>(V)) { + MessagesStr << *V << '\n'; + } else { + WriteAsOperand(MessagesStr, V, true, Mod); + MessagesStr << '\n'; + } + } + + // CheckFailed - A check failed, so print out the condition and the message + // that failed. This provides a nice place to put a breakpoint if you want + // to see why something is not correct. + void CheckFailed(const Twine &Message, + const Value *V1 = 0, const Value *V2 = 0, + const Value *V3 = 0, const Value *V4 = 0) { + MessagesStr << Message.str() << "\n"; + WriteValue(V1); + WriteValue(V2); + WriteValue(V3); + WriteValue(V4); + } + }; +} + +char Lint::ID = 0; +INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", + false, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", + false, true) + +// Assert - We know that cond should be true, if not print an error message. +#define Assert(C, M) \ + do { if (!(C)) { CheckFailed(M); return; } } while (0) +#define Assert1(C, M, V1) \ + do { if (!(C)) { CheckFailed(M, V1); return; } } while (0) +#define Assert2(C, M, V1, V2) \ + do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0) +#define Assert3(C, M, V1, V2, V3) \ + do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0) +#define Assert4(C, M, V1, V2, V3, V4) \ + do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0) + +// Lint::run - This is the main Analysis entry point for a +// function. +// +bool Lint::runOnFunction(Function &F) { + Mod = F.getParent(); + AA = &getAnalysis<AliasAnalysis>(); + DT = &getAnalysis<DominatorTree>(); + TD = getAnalysisIfAvailable<TargetData>(); + visit(F); + dbgs() << MessagesStr.str(); + Messages.clear(); + return false; +} + +void Lint::visitFunction(Function &F) { + // This isn't undefined behavior, it's just a little unusual, and it's a + // fairly common mistake to neglect to name a function. + Assert1(F.hasName() || F.hasLocalLinkage(), + "Unusual: Unnamed function with non-local linkage", &F); + + // TODO: Check for irreducible control flow. +} + +void Lint::visitCallSite(CallSite CS) { + Instruction &I = *CS.getInstruction(); + Value *Callee = CS.getCalledValue(); + + visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize, + 0, 0, MemRef::Callee); + + if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) { + Assert1(CS.getCallingConv() == F->getCallingConv(), + "Undefined behavior: Caller and callee calling convention differ", + &I); + + const FunctionType *FT = F->getFunctionType(); + unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin()); + + Assert1(FT->isVarArg() ? + FT->getNumParams() <= NumActualArgs : + FT->getNumParams() == NumActualArgs, + "Undefined behavior: Call argument count mismatches callee " + "argument count", &I); + + Assert1(FT->getReturnType() == I.getType(), + "Undefined behavior: Call return type mismatches " + "callee return type", &I); + + // Check argument types (in case the callee was casted) and attributes. + // TODO: Verify that caller and callee attributes are compatible. + Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end(); + CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); + for (; AI != AE; ++AI) { + Value *Actual = *AI; + if (PI != PE) { + Argument *Formal = PI++; + Assert1(Formal->getType() == Actual->getType(), + "Undefined behavior: Call argument type mismatches " + "callee parameter type", &I); + + // Check that noalias arguments don't alias other arguments. This is + // not fully precise because we don't know the sizes of the dereferenced + // memory regions. + if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) + for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) + if (AI != BI && (*BI)->getType()->isPointerTy()) { + AliasAnalysis::AliasResult Result = AA->alias(*AI, *BI); + Assert1(Result != AliasAnalysis::MustAlias && + Result != AliasAnalysis::PartialAlias, + "Unusual: noalias argument aliases another argument", &I); + } + + // Check that an sret argument points to valid memory. + if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { + const Type *Ty = + cast<PointerType>(Formal->getType())->getElementType(); + visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty), + TD ? TD->getABITypeAlignment(Ty) : 0, + Ty, MemRef::Read | MemRef::Write); + } + } + } + } + + if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall()) + for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); + AI != AE; ++AI) { + Value *Obj = findValue(*AI, /*OffsetOk=*/true); + Assert1(!isa<AllocaInst>(Obj), + "Undefined behavior: Call with \"tail\" keyword references " + "alloca", &I); + } + + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) + switch (II->getIntrinsicID()) { + default: break; + + // TODO: Check more intrinsics + + case Intrinsic::memcpy: { + MemCpyInst *MCI = cast<MemCpyInst>(&I); + // TODO: If the size is known, use it. + visitMemoryReference(I, MCI->getDest(), AliasAnalysis::UnknownSize, + MCI->getAlignment(), 0, + MemRef::Write); + visitMemoryReference(I, MCI->getSource(), AliasAnalysis::UnknownSize, + MCI->getAlignment(), 0, + MemRef::Read); + + // Check that the memcpy arguments don't overlap. The AliasAnalysis API + // isn't expressive enough for what we really want to do. Known partial + // overlap is not distinguished from the case where nothing is known. + uint64_t Size = 0; + if (const ConstantInt *Len = + dyn_cast<ConstantInt>(findValue(MCI->getLength(), + /*OffsetOk=*/false))) + if (Len->getValue().isIntN(32)) + Size = Len->getValue().getZExtValue(); + Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != + AliasAnalysis::MustAlias, + "Undefined behavior: memcpy source and destination overlap", &I); + break; + } + case Intrinsic::memmove: { + MemMoveInst *MMI = cast<MemMoveInst>(&I); + // TODO: If the size is known, use it. + visitMemoryReference(I, MMI->getDest(), AliasAnalysis::UnknownSize, + MMI->getAlignment(), 0, + MemRef::Write); + visitMemoryReference(I, MMI->getSource(), AliasAnalysis::UnknownSize, + MMI->getAlignment(), 0, + MemRef::Read); + break; + } + case Intrinsic::memset: { + MemSetInst *MSI = cast<MemSetInst>(&I); + // TODO: If the size is known, use it. + visitMemoryReference(I, MSI->getDest(), AliasAnalysis::UnknownSize, + MSI->getAlignment(), 0, + MemRef::Write); + break; + } + + case Intrinsic::vastart: + Assert1(I.getParent()->getParent()->isVarArg(), + "Undefined behavior: va_start called in a non-varargs function", + &I); + + visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, + 0, 0, MemRef::Read | MemRef::Write); + break; + case Intrinsic::vacopy: + visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, + 0, 0, MemRef::Write); + visitMemoryReference(I, CS.getArgument(1), AliasAnalysis::UnknownSize, + 0, 0, MemRef::Read); + break; + case Intrinsic::vaend: + visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, + 0, 0, MemRef::Read | MemRef::Write); + break; + + case Intrinsic::stackrestore: + // Stackrestore doesn't read or write memory, but it sets the + // stack pointer, which the compiler may read from or write to + // at any time, so check it for both readability and writeability. + visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, + 0, 0, MemRef::Read | MemRef::Write); + break; + } +} + +void Lint::visitCallInst(CallInst &I) { + return visitCallSite(&I); +} + +void Lint::visitInvokeInst(InvokeInst &I) { + return visitCallSite(&I); +} + +void Lint::visitReturnInst(ReturnInst &I) { + Function *F = I.getParent()->getParent(); + Assert1(!F->doesNotReturn(), + "Unusual: Return statement in function with noreturn attribute", + &I); + + if (Value *V = I.getReturnValue()) { + Value *Obj = findValue(V, /*OffsetOk=*/true); + Assert1(!isa<AllocaInst>(Obj), + "Unusual: Returning alloca value", &I); + } +} + +// TODO: Check that the reference is in bounds. +// TODO: Check readnone/readonly function attributes. +void Lint::visitMemoryReference(Instruction &I, + Value *Ptr, uint64_t Size, unsigned Align, + const Type *Ty, unsigned Flags) { + // If no memory is being referenced, it doesn't matter if the pointer + // is valid. + if (Size == 0) + return; + + Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true); + Assert1(!isa<ConstantPointerNull>(UnderlyingObject), + "Undefined behavior: Null pointer dereference", &I); + Assert1(!isa<UndefValue>(UnderlyingObject), + "Undefined behavior: Undef pointer dereference", &I); + Assert1(!isa<ConstantInt>(UnderlyingObject) || + !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(), + "Unusual: All-ones pointer dereference", &I); + Assert1(!isa<ConstantInt>(UnderlyingObject) || + !cast<ConstantInt>(UnderlyingObject)->isOne(), + "Unusual: Address one pointer dereference", &I); + + if (Flags & MemRef::Write) { + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject)) + Assert1(!GV->isConstant(), + "Undefined behavior: Write to read-only memory", &I); + Assert1(!isa<Function>(UnderlyingObject) && + !isa<BlockAddress>(UnderlyingObject), + "Undefined behavior: Write to text section", &I); + } + if (Flags & MemRef::Read) { + Assert1(!isa<Function>(UnderlyingObject), + "Unusual: Load from function body", &I); + Assert1(!isa<BlockAddress>(UnderlyingObject), + "Undefined behavior: Load from block address", &I); + } + if (Flags & MemRef::Callee) { + Assert1(!isa<BlockAddress>(UnderlyingObject), + "Undefined behavior: Call to block address", &I); + } + if (Flags & MemRef::Branchee) { + Assert1(!isa<Constant>(UnderlyingObject) || + isa<BlockAddress>(UnderlyingObject), + "Undefined behavior: Branch to non-blockaddress", &I); + } + + if (TD) { + if (Align == 0 && Ty) Align = TD->getABITypeAlignment(Ty); + + if (Align != 0) { + unsigned BitWidth = TD->getTypeSizeInBits(Ptr->getType()); + APInt Mask = APInt::getAllOnesValue(BitWidth), + KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(Ptr, Mask, KnownZero, KnownOne, TD); + Assert1(!(KnownOne & APInt::getLowBitsSet(BitWidth, Log2_32(Align))), + "Undefined behavior: Memory reference address is misaligned", &I); + } + } +} + +void Lint::visitLoadInst(LoadInst &I) { + visitMemoryReference(I, I.getPointerOperand(), + AA->getTypeStoreSize(I.getType()), I.getAlignment(), + I.getType(), MemRef::Read); +} + +void Lint::visitStoreInst(StoreInst &I) { + visitMemoryReference(I, I.getPointerOperand(), + AA->getTypeStoreSize(I.getOperand(0)->getType()), + I.getAlignment(), + I.getOperand(0)->getType(), MemRef::Write); +} + +void Lint::visitXor(BinaryOperator &I) { + Assert1(!isa<UndefValue>(I.getOperand(0)) || + !isa<UndefValue>(I.getOperand(1)), + "Undefined result: xor(undef, undef)", &I); +} + +void Lint::visitSub(BinaryOperator &I) { + Assert1(!isa<UndefValue>(I.getOperand(0)) || + !isa<UndefValue>(I.getOperand(1)), + "Undefined result: sub(undef, undef)", &I); +} + +void Lint::visitLShr(BinaryOperator &I) { + if (ConstantInt *CI = + dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) + Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), + "Undefined result: Shift count out of range", &I); +} + +void Lint::visitAShr(BinaryOperator &I) { + if (ConstantInt *CI = + dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) + Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), + "Undefined result: Shift count out of range", &I); +} + +void Lint::visitShl(BinaryOperator &I) { + if (ConstantInt *CI = + dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) + Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), + "Undefined result: Shift count out of range", &I); +} + +static bool isZero(Value *V, TargetData *TD) { + // Assume undef could be zero. + if (isa<UndefValue>(V)) return true; + + unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); + APInt Mask = APInt::getAllOnesValue(BitWidth), + KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD); + return KnownZero.isAllOnesValue(); +} + +void Lint::visitSDiv(BinaryOperator &I) { + Assert1(!isZero(I.getOperand(1), TD), + "Undefined behavior: Division by zero", &I); +} + +void Lint::visitUDiv(BinaryOperator &I) { + Assert1(!isZero(I.getOperand(1), TD), + "Undefined behavior: Division by zero", &I); +} + +void Lint::visitSRem(BinaryOperator &I) { + Assert1(!isZero(I.getOperand(1), TD), + "Undefined behavior: Division by zero", &I); +} + +void Lint::visitURem(BinaryOperator &I) { + Assert1(!isZero(I.getOperand(1), TD), + "Undefined behavior: Division by zero", &I); +} + +void Lint::visitAllocaInst(AllocaInst &I) { + if (isa<ConstantInt>(I.getArraySize())) + // This isn't undefined behavior, it's just an obvious pessimization. + Assert1(&I.getParent()->getParent()->getEntryBlock() == I.getParent(), + "Pessimization: Static alloca outside of entry block", &I); + + // TODO: Check for an unusual size (MSB set?) +} + +void Lint::visitVAArgInst(VAArgInst &I) { + visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0, 0, + MemRef::Read | MemRef::Write); +} + +void Lint::visitIndirectBrInst(IndirectBrInst &I) { + visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0, 0, + MemRef::Branchee); + + Assert1(I.getNumDestinations() != 0, + "Undefined behavior: indirectbr with no destinations", &I); +} + +void Lint::visitExtractElementInst(ExtractElementInst &I) { + if (ConstantInt *CI = + dyn_cast<ConstantInt>(findValue(I.getIndexOperand(), + /*OffsetOk=*/false))) + Assert1(CI->getValue().ult(I.getVectorOperandType()->getNumElements()), + "Undefined result: extractelement index out of range", &I); +} + +void Lint::visitInsertElementInst(InsertElementInst &I) { + if (ConstantInt *CI = + dyn_cast<ConstantInt>(findValue(I.getOperand(2), + /*OffsetOk=*/false))) + Assert1(CI->getValue().ult(I.getType()->getNumElements()), + "Undefined result: insertelement index out of range", &I); +} + +void Lint::visitUnreachableInst(UnreachableInst &I) { + // This isn't undefined behavior, it's merely suspicious. + Assert1(&I == I.getParent()->begin() || + prior(BasicBlock::iterator(&I))->mayHaveSideEffects(), + "Unusual: unreachable immediately preceded by instruction without " + "side effects", &I); +} + +/// findValue - Look through bitcasts and simple memory reference patterns +/// to identify an equivalent, but more informative, value. If OffsetOk +/// is true, look through getelementptrs with non-zero offsets too. +/// +/// Most analysis passes don't require this logic, because instcombine +/// will simplify most of these kinds of things away. But it's a goal of +/// this Lint pass to be useful even on non-optimized IR. +Value *Lint::findValue(Value *V, bool OffsetOk) const { + SmallPtrSet<Value *, 4> Visited; + return findValueImpl(V, OffsetOk, Visited); +} + +/// findValueImpl - Implementation helper for findValue. +Value *Lint::findValueImpl(Value *V, bool OffsetOk, + SmallPtrSet<Value *, 4> &Visited) const { + // Detect self-referential values. + if (!Visited.insert(V)) + return UndefValue::get(V->getType()); + + // TODO: Look through sext or zext cast, when the result is known to + // be interpreted as signed or unsigned, respectively. + // TODO: Look through eliminable cast pairs. + // TODO: Look through calls with unique return values. + // TODO: Look through vector insert/extract/shuffle. + V = OffsetOk ? GetUnderlyingObject(V, TD) : V->stripPointerCasts(); + if (LoadInst *L = dyn_cast<LoadInst>(V)) { + BasicBlock::iterator BBI = L; + BasicBlock *BB = L->getParent(); + SmallPtrSet<BasicBlock *, 4> VisitedBlocks; + for (;;) { + if (!VisitedBlocks.insert(BB)) break; + if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(), + BB, BBI, 6, AA)) + return findValueImpl(U, OffsetOk, Visited); + if (BBI != BB->begin()) break; + BB = BB->getUniquePredecessor(); + if (!BB) break; + BBI = BB->end(); + } + } else if (PHINode *PN = dyn_cast<PHINode>(V)) { + if (Value *W = PN->hasConstantValue()) + if (W != V) + return findValueImpl(W, OffsetOk, Visited); + } else if (CastInst *CI = dyn_cast<CastInst>(V)) { + if (CI->isNoopCast(TD ? TD->getIntPtrType(V->getContext()) : + Type::getInt64Ty(V->getContext()))) + return findValueImpl(CI->getOperand(0), OffsetOk, Visited); + } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) { + if (Value *W = FindInsertedValue(Ex->getAggregateOperand(), + Ex->getIndices())) + if (W != V) + return findValueImpl(W, OffsetOk, Visited); + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + // Same as above, but for ConstantExpr instead of Instruction. + if (Instruction::isCast(CE->getOpcode())) { + if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), + CE->getOperand(0)->getType(), + CE->getType(), + TD ? TD->getIntPtrType(V->getContext()) : + Type::getInt64Ty(V->getContext()))) + return findValueImpl(CE->getOperand(0), OffsetOk, Visited); + } else if (CE->getOpcode() == Instruction::ExtractValue) { + ArrayRef<unsigned> Indices = CE->getIndices(); + if (Value *W = FindInsertedValue(CE->getOperand(0), Indices)) + if (W != V) + return findValueImpl(W, OffsetOk, Visited); + } + } + + // As a last resort, try SimplifyInstruction or constant folding. + if (Instruction *Inst = dyn_cast<Instruction>(V)) { + if (Value *W = SimplifyInstruction(Inst, TD, DT)) + return findValueImpl(W, OffsetOk, Visited); + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + if (Value *W = ConstantFoldConstantExpression(CE, TD)) + if (W != V) + return findValueImpl(W, OffsetOk, Visited); + } + + return V; +} + +//===----------------------------------------------------------------------===// +// Implement the public interfaces to this file... +//===----------------------------------------------------------------------===// + +FunctionPass *llvm::createLintPass() { + return new Lint(); +} + +/// lintFunction - Check a function for errors, printing messages on stderr. +/// +void llvm::lintFunction(const Function &f) { + Function &F = const_cast<Function&>(f); + assert(!F.isDeclaration() && "Cannot lint external functions"); + + FunctionPassManager FPM(F.getParent()); + Lint *V = new Lint(); + FPM.add(V); + FPM.run(F); +} + +/// lintModule - Check a module for errors, printing messages on stderr. +/// +void llvm::lintModule(const Module &M) { + PassManager PM; + Lint *V = new Lint(); + PM.add(V); + PM.run(const_cast<Module&>(M)); +} diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp new file mode 100644 index 0000000..c5c676b --- /dev/null +++ b/contrib/llvm/lib/Analysis/Loads.cpp @@ -0,0 +1,236 @@ +//===- Loads.cpp - Local load analysis ------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines simple local analyses for load instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Target/TargetData.h" +#include "llvm/GlobalAlias.h" +#include "llvm/GlobalVariable.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Operator.h" +using namespace llvm; + +/// AreEquivalentAddressValues - Test if A and B will obviously have the same +/// value. This includes recognizing that %t0 and %t1 will have the same +/// value in code like this: +/// %t0 = getelementptr \@a, 0, 3 +/// store i32 0, i32* %t0 +/// %t1 = getelementptr \@a, 0, 3 +/// %t2 = load i32* %t1 +/// +static bool AreEquivalentAddressValues(const Value *A, const Value *B) { + // Test if the values are trivially equivalent. + if (A == B) return true; + + // Test if the values come from identical arithmetic instructions. + // Use isIdenticalToWhenDefined instead of isIdenticalTo because + // this function is only used when one address use dominates the + // other, which means that they'll always either have the same + // value or one of them will have an undefined value. + if (isa<BinaryOperator>(A) || isa<CastInst>(A) || + isa<PHINode>(A) || isa<GetElementPtrInst>(A)) + if (const Instruction *BI = dyn_cast<Instruction>(B)) + if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) + return true; + + // Otherwise they may not be equivalent. + return false; +} + +/// getUnderlyingObjectWithOffset - Strip off up to MaxLookup GEPs and +/// bitcasts to get back to the underlying object being addressed, keeping +/// track of the offset in bytes from the GEPs relative to the result. +/// This is closely related to GetUnderlyingObject but is located +/// here to avoid making VMCore depend on TargetData. +static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD, + uint64_t &ByteOffset, + unsigned MaxLookup = 6) { + if (!V->getType()->isPointerTy()) + return V; + for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { + if (!GEP->hasAllConstantIndices()) + return V; + SmallVector<Value*, 8> Indices(GEP->op_begin() + 1, GEP->op_end()); + ByteOffset += TD->getIndexedOffset(GEP->getPointerOperandType(), + &Indices[0], Indices.size()); + V = GEP->getPointerOperand(); + } else if (Operator::getOpcode(V) == Instruction::BitCast) { + V = cast<Operator>(V)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { + if (GA->mayBeOverridden()) + return V; + V = GA->getAliasee(); + } else { + return V; + } + assert(V->getType()->isPointerTy() && "Unexpected operand type!"); + } + return V; +} + +/// isSafeToLoadUnconditionally - Return true if we know that executing a load +/// from this value cannot trap. If it is not obviously safe to load from the +/// specified pointer, we do a quick local scan of the basic block containing +/// ScanFrom, to determine if the address is already accessed. +bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, + unsigned Align, const TargetData *TD) { + uint64_t ByteOffset = 0; + Value *Base = V; + if (TD) + Base = getUnderlyingObjectWithOffset(V, TD, ByteOffset); + + const Type *BaseType = 0; + unsigned BaseAlign = 0; + if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { + // An alloca is safe to load from as load as it is suitably aligned. + BaseType = AI->getAllocatedType(); + BaseAlign = AI->getAlignment(); + } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Base)) { + // Global variables are safe to load from but their size cannot be + // guaranteed if they are overridden. + if (!isa<GlobalAlias>(GV) && !GV->mayBeOverridden()) { + BaseType = GV->getType()->getElementType(); + BaseAlign = GV->getAlignment(); + } + } + + if (BaseType && BaseType->isSized()) { + if (TD && BaseAlign == 0) + BaseAlign = TD->getPrefTypeAlignment(BaseType); + + if (Align <= BaseAlign) { + if (!TD) + return true; // Loading directly from an alloca or global is OK. + + // Check if the load is within the bounds of the underlying object. + const PointerType *AddrTy = cast<PointerType>(V->getType()); + uint64_t LoadSize = TD->getTypeStoreSize(AddrTy->getElementType()); + if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType) && + (Align == 0 || (ByteOffset % Align) == 0)) + return true; + } + } + + // Otherwise, be a little bit aggressive by scanning the local block where we + // want to check to see if the pointer is already being loaded or stored + // from/to. If so, the previous load or store would have already trapped, + // so there is no harm doing an extra load (also, CSE will later eliminate + // the load entirely). + BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin(); + + while (BBI != E) { + --BBI; + + // If we see a free or a call which may write to memory (i.e. which might do + // a free) the pointer could be marked invalid. + if (isa<CallInst>(BBI) && BBI->mayWriteToMemory() && + !isa<DbgInfoIntrinsic>(BBI)) + return false; + + if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { + if (AreEquivalentAddressValues(LI->getOperand(0), V)) return true; + } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { + if (AreEquivalentAddressValues(SI->getOperand(1), V)) return true; + } + } + return false; +} + +/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at the +/// instruction before ScanFrom) checking to see if we have the value at the +/// memory address *Ptr locally available within a small number of instructions. +/// If the value is available, return it. +/// +/// If not, return the iterator for the last validated instruction that the +/// value would be live through. If we scanned the entire block and didn't find +/// something that invalidates *Ptr or provides it, ScanFrom would be left at +/// begin() and this returns null. ScanFrom could also be left +/// +/// MaxInstsToScan specifies the maximum instructions to scan in the block. If +/// it is set to 0, it will scan the whole block. You can also optionally +/// specify an alias analysis implementation, which makes this more precise. +Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, + BasicBlock::iterator &ScanFrom, + unsigned MaxInstsToScan, + AliasAnalysis *AA) { + if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; + + // If we're using alias analysis to disambiguate get the size of *Ptr. + uint64_t AccessSize = 0; + if (AA) { + const Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType(); + AccessSize = AA->getTypeStoreSize(AccessTy); + } + + while (ScanFrom != ScanBB->begin()) { + // We must ignore debug info directives when counting (otherwise they + // would affect codegen). + Instruction *Inst = --ScanFrom; + if (isa<DbgInfoIntrinsic>(Inst)) + continue; + + // Restore ScanFrom to expected value in case next test succeeds + ScanFrom++; + + // Don't scan huge blocks. + if (MaxInstsToScan-- == 0) return 0; + + --ScanFrom; + // If this is a load of Ptr, the loaded value is available. + if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) + if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) + return LI; + + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + // If this is a store through Ptr, the value is available! + if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) + return SI->getOperand(0); + + // If Ptr is an alloca and this is a store to a different alloca, ignore + // the store. This is a trivial form of alias analysis that is important + // for reg2mem'd code. + if ((isa<AllocaInst>(Ptr) || isa<GlobalVariable>(Ptr)) && + (isa<AllocaInst>(SI->getOperand(1)) || + isa<GlobalVariable>(SI->getOperand(1)))) + continue; + + // If we have alias analysis and it says the store won't modify the loaded + // value, ignore the store. + if (AA && + (AA->getModRefInfo(SI, Ptr, AccessSize) & AliasAnalysis::Mod) == 0) + continue; + + // Otherwise the store that may or may not alias the pointer, bail out. + ++ScanFrom; + return 0; + } + + // If this is some other instruction that may clobber Ptr, bail out. + if (Inst->mayWriteToMemory()) { + // If alias analysis claims that it really won't modify the load, + // ignore it. + if (AA && + (AA->getModRefInfo(Inst, Ptr, AccessSize) & AliasAnalysis::Mod) == 0) + continue; + + // May modify the pointer, bail out. + ++ScanFrom; + return 0; + } + } + + // Got to the start of the block, we didn't find it, but are done for this + // block. + return 0; +} diff --git a/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp new file mode 100644 index 0000000..c1afe8f --- /dev/null +++ b/contrib/llvm/lib/Analysis/LoopDependenceAnalysis.cpp @@ -0,0 +1,358 @@ +//===- LoopDependenceAnalysis.cpp - LDA Implementation ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the (beginning) of an implementation of a loop dependence analysis +// framework, which is used to detect dependences in memory accesses in loops. +// +// Please note that this is work in progress and the interface is subject to +// change. +// +// TODO: adapt as implementation progresses. +// +// TODO: document lingo (pair, subscript, index) +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "lda" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/LoopDependenceAnalysis.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Instructions.h" +#include "llvm/Operator.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +using namespace llvm; + +STATISTIC(NumAnswered, "Number of dependence queries answered"); +STATISTIC(NumAnalysed, "Number of distinct dependence pairs analysed"); +STATISTIC(NumDependent, "Number of pairs with dependent accesses"); +STATISTIC(NumIndependent, "Number of pairs with independent accesses"); +STATISTIC(NumUnknown, "Number of pairs with unknown accesses"); + +LoopPass *llvm::createLoopDependenceAnalysisPass() { + return new LoopDependenceAnalysis(); +} + +INITIALIZE_PASS_BEGIN(LoopDependenceAnalysis, "lda", + "Loop Dependence Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(LoopDependenceAnalysis, "lda", + "Loop Dependence Analysis", false, true) +char LoopDependenceAnalysis::ID = 0; + +//===----------------------------------------------------------------------===// +// Utility Functions +//===----------------------------------------------------------------------===// + +static inline bool IsMemRefInstr(const Value *V) { + const Instruction *I = dyn_cast<const Instruction>(V); + return I && (I->mayReadFromMemory() || I->mayWriteToMemory()); +} + +static void GetMemRefInstrs(const Loop *L, + SmallVectorImpl<Instruction*> &Memrefs) { + for (Loop::block_iterator b = L->block_begin(), be = L->block_end(); + b != be; ++b) + for (BasicBlock::iterator i = (*b)->begin(), ie = (*b)->end(); + i != ie; ++i) + if (IsMemRefInstr(i)) + Memrefs.push_back(i); +} + +static bool IsLoadOrStoreInst(Value *I) { + return isa<LoadInst>(I) || isa<StoreInst>(I); +} + +static Value *GetPointerOperand(Value *I) { + if (LoadInst *i = dyn_cast<LoadInst>(I)) + return i->getPointerOperand(); + if (StoreInst *i = dyn_cast<StoreInst>(I)) + return i->getPointerOperand(); + llvm_unreachable("Value is no load or store instruction!"); + // Never reached. + return 0; +} + +static AliasAnalysis::AliasResult UnderlyingObjectsAlias(AliasAnalysis *AA, + const Value *A, + const Value *B) { + const Value *aObj = GetUnderlyingObject(A); + const Value *bObj = GetUnderlyingObject(B); + return AA->alias(aObj, AA->getTypeStoreSize(aObj->getType()), + bObj, AA->getTypeStoreSize(bObj->getType())); +} + +static inline const SCEV *GetZeroSCEV(ScalarEvolution *SE) { + return SE->getConstant(Type::getInt32Ty(SE->getContext()), 0L); +} + +//===----------------------------------------------------------------------===// +// Dependence Testing +//===----------------------------------------------------------------------===// + +bool LoopDependenceAnalysis::isDependencePair(const Value *A, + const Value *B) const { + return IsMemRefInstr(A) && + IsMemRefInstr(B) && + (cast<const Instruction>(A)->mayWriteToMemory() || + cast<const Instruction>(B)->mayWriteToMemory()); +} + +bool LoopDependenceAnalysis::findOrInsertDependencePair(Value *A, + Value *B, + DependencePair *&P) { + void *insertPos = 0; + FoldingSetNodeID id; + id.AddPointer(A); + id.AddPointer(B); + + P = Pairs.FindNodeOrInsertPos(id, insertPos); + if (P) return true; + + P = new (PairAllocator) DependencePair(id, A, B); + Pairs.InsertNode(P, insertPos); + return false; +} + +void LoopDependenceAnalysis::getLoops(const SCEV *S, + DenseSet<const Loop*>* Loops) const { + // Refactor this into an SCEVVisitor, if efficiency becomes a concern. + for (const Loop *L = this->L; L != 0; L = L->getParentLoop()) + if (!SE->isLoopInvariant(S, L)) + Loops->insert(L); +} + +bool LoopDependenceAnalysis::isLoopInvariant(const SCEV *S) const { + DenseSet<const Loop*> loops; + getLoops(S, &loops); + return loops.empty(); +} + +bool LoopDependenceAnalysis::isAffine(const SCEV *S) const { + const SCEVAddRecExpr *rec = dyn_cast<SCEVAddRecExpr>(S); + return isLoopInvariant(S) || (rec && rec->isAffine()); +} + +bool LoopDependenceAnalysis::isZIVPair(const SCEV *A, const SCEV *B) const { + return isLoopInvariant(A) && isLoopInvariant(B); +} + +bool LoopDependenceAnalysis::isSIVPair(const SCEV *A, const SCEV *B) const { + DenseSet<const Loop*> loops; + getLoops(A, &loops); + getLoops(B, &loops); + return loops.size() == 1; +} + +LoopDependenceAnalysis::DependenceResult +LoopDependenceAnalysis::analyseZIV(const SCEV *A, + const SCEV *B, + Subscript *S) const { + assert(isZIVPair(A, B) && "Attempted to ZIV-test non-ZIV SCEVs!"); + return A == B ? Dependent : Independent; +} + +LoopDependenceAnalysis::DependenceResult +LoopDependenceAnalysis::analyseSIV(const SCEV *A, + const SCEV *B, + Subscript *S) const { + return Unknown; // TODO: Implement. +} + +LoopDependenceAnalysis::DependenceResult +LoopDependenceAnalysis::analyseMIV(const SCEV *A, + const SCEV *B, + Subscript *S) const { + return Unknown; // TODO: Implement. +} + +LoopDependenceAnalysis::DependenceResult +LoopDependenceAnalysis::analyseSubscript(const SCEV *A, + const SCEV *B, + Subscript *S) const { + DEBUG(dbgs() << " Testing subscript: " << *A << ", " << *B << "\n"); + + if (A == B) { + DEBUG(dbgs() << " -> [D] same SCEV\n"); + return Dependent; + } + + if (!isAffine(A) || !isAffine(B)) { + DEBUG(dbgs() << " -> [?] not affine\n"); + return Unknown; + } + + if (isZIVPair(A, B)) + return analyseZIV(A, B, S); + + if (isSIVPair(A, B)) + return analyseSIV(A, B, S); + + return analyseMIV(A, B, S); +} + +LoopDependenceAnalysis::DependenceResult +LoopDependenceAnalysis::analysePair(DependencePair *P) const { + DEBUG(dbgs() << "Analysing:\n" << *P->A << "\n" << *P->B << "\n"); + + // We only analyse loads and stores but no possible memory accesses by e.g. + // free, call, or invoke instructions. + if (!IsLoadOrStoreInst(P->A) || !IsLoadOrStoreInst(P->B)) { + DEBUG(dbgs() << "--> [?] no load/store\n"); + return Unknown; + } + + Value *aPtr = GetPointerOperand(P->A); + Value *bPtr = GetPointerOperand(P->B); + + switch (UnderlyingObjectsAlias(AA, aPtr, bPtr)) { + case AliasAnalysis::MayAlias: + case AliasAnalysis::PartialAlias: + // We can not analyse objects if we do not know about their aliasing. + DEBUG(dbgs() << "---> [?] may alias\n"); + return Unknown; + + case AliasAnalysis::NoAlias: + // If the objects noalias, they are distinct, accesses are independent. + DEBUG(dbgs() << "---> [I] no alias\n"); + return Independent; + + case AliasAnalysis::MustAlias: + break; // The underlying objects alias, test accesses for dependence. + } + + const GEPOperator *aGEP = dyn_cast<GEPOperator>(aPtr); + const GEPOperator *bGEP = dyn_cast<GEPOperator>(bPtr); + + if (!aGEP || !bGEP) + return Unknown; + + // FIXME: Is filtering coupled subscripts necessary? + + // Collect GEP operand pairs (FIXME: use GetGEPOperands from BasicAA), adding + // trailing zeroes to the smaller GEP, if needed. + typedef SmallVector<std::pair<const SCEV*, const SCEV*>, 4> GEPOpdPairsTy; + GEPOpdPairsTy opds; + for(GEPOperator::const_op_iterator aIdx = aGEP->idx_begin(), + aEnd = aGEP->idx_end(), + bIdx = bGEP->idx_begin(), + bEnd = bGEP->idx_end(); + aIdx != aEnd && bIdx != bEnd; + aIdx += (aIdx != aEnd), bIdx += (bIdx != bEnd)) { + const SCEV* aSCEV = (aIdx != aEnd) ? SE->getSCEV(*aIdx) : GetZeroSCEV(SE); + const SCEV* bSCEV = (bIdx != bEnd) ? SE->getSCEV(*bIdx) : GetZeroSCEV(SE); + opds.push_back(std::make_pair(aSCEV, bSCEV)); + } + + if (!opds.empty() && opds[0].first != opds[0].second) { + // We cannot (yet) handle arbitrary GEP pointer offsets. By limiting + // + // TODO: this could be relaxed by adding the size of the underlying object + // to the first subscript. If we have e.g. (GEP x,0,i; GEP x,2,-i) and we + // know that x is a [100 x i8]*, we could modify the first subscript to be + // (i, 200-i) instead of (i, -i). + return Unknown; + } + + // Now analyse the collected operand pairs (skipping the GEP ptr offsets). + for (GEPOpdPairsTy::const_iterator i = opds.begin() + 1, end = opds.end(); + i != end; ++i) { + Subscript subscript; + DependenceResult result = analyseSubscript(i->first, i->second, &subscript); + if (result != Dependent) { + // We either proved independence or failed to analyse this subscript. + // Further subscripts will not improve the situation, so abort early. + return result; + } + P->Subscripts.push_back(subscript); + } + // We successfully analysed all subscripts but failed to prove independence. + return Dependent; +} + +bool LoopDependenceAnalysis::depends(Value *A, Value *B) { + assert(isDependencePair(A, B) && "Values form no dependence pair!"); + ++NumAnswered; + + DependencePair *p; + if (!findOrInsertDependencePair(A, B, p)) { + // The pair is not cached, so analyse it. + ++NumAnalysed; + switch (p->Result = analysePair(p)) { + case Dependent: ++NumDependent; break; + case Independent: ++NumIndependent; break; + case Unknown: ++NumUnknown; break; + } + } + return p->Result != Independent; +} + +//===----------------------------------------------------------------------===// +// LoopDependenceAnalysis Implementation +//===----------------------------------------------------------------------===// + +bool LoopDependenceAnalysis::runOnLoop(Loop *L, LPPassManager &) { + this->L = L; + AA = &getAnalysis<AliasAnalysis>(); + SE = &getAnalysis<ScalarEvolution>(); + return false; +} + +void LoopDependenceAnalysis::releaseMemory() { + Pairs.clear(); + PairAllocator.Reset(); +} + +void LoopDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<AliasAnalysis>(); + AU.addRequiredTransitive<ScalarEvolution>(); +} + +static void PrintLoopInfo(raw_ostream &OS, + LoopDependenceAnalysis *LDA, const Loop *L) { + if (!L->empty()) return; // ignore non-innermost loops + + SmallVector<Instruction*, 8> memrefs; + GetMemRefInstrs(L, memrefs); + + OS << "Loop at depth " << L->getLoopDepth() << ", header block: "; + WriteAsOperand(OS, L->getHeader(), false); + OS << "\n"; + + OS << " Load/store instructions: " << memrefs.size() << "\n"; + for (SmallVector<Instruction*, 8>::const_iterator x = memrefs.begin(), + end = memrefs.end(); x != end; ++x) + OS << "\t" << (x - memrefs.begin()) << ": " << **x << "\n"; + + OS << " Pairwise dependence results:\n"; + for (SmallVector<Instruction*, 8>::const_iterator x = memrefs.begin(), + end = memrefs.end(); x != end; ++x) + for (SmallVector<Instruction*, 8>::const_iterator y = x + 1; + y != end; ++y) + if (LDA->isDependencePair(*x, *y)) + OS << "\t" << (x - memrefs.begin()) << "," << (y - memrefs.begin()) + << ": " << (LDA->depends(*x, *y) ? "dependent" : "independent") + << "\n"; +} + +void LoopDependenceAnalysis::print(raw_ostream &OS, const Module*) const { + // TODO: doc why const_cast is safe + PrintLoopInfo(OS, const_cast<LoopDependenceAnalysis*>(this), this->L); +} diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp new file mode 100644 index 0000000..0583140 --- /dev/null +++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp @@ -0,0 +1,419 @@ +//===- LoopInfo.cpp - Natural Loop Calculator -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the LoopInfo class that is used to identify natural loops +// and determine the loop depth of various nodes of the CFG. Note that the +// loops identified may actually be several natural loops that share the same +// header node... not just a single natural loop. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include <algorithm> +using namespace llvm; + +// Always verify loopinfo if expensive checking is enabled. +#ifdef XDEBUG +static bool VerifyLoopInfo = true; +#else +static bool VerifyLoopInfo = false; +#endif +static cl::opt<bool,true> +VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo), + cl::desc("Verify loop info (time consuming)")); + +char LoopInfo::ID = 0; +INITIALIZE_PASS_BEGIN(LoopInfo, "loops", "Natural Loop Information", true, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_END(LoopInfo, "loops", "Natural Loop Information", true, true) + +//===----------------------------------------------------------------------===// +// Loop implementation +// + +/// isLoopInvariant - Return true if the specified value is loop invariant +/// +bool Loop::isLoopInvariant(Value *V) const { + if (Instruction *I = dyn_cast<Instruction>(V)) + return !contains(I); + return true; // All non-instructions are loop invariant +} + +/// hasLoopInvariantOperands - Return true if all the operands of the +/// specified instruction are loop invariant. +bool Loop::hasLoopInvariantOperands(Instruction *I) const { + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (!isLoopInvariant(I->getOperand(i))) + return false; + + return true; +} + +/// makeLoopInvariant - If the given value is an instruciton inside of the +/// loop and it can be hoisted, do so to make it trivially loop-invariant. +/// Return true if the value after any hoisting is loop invariant. This +/// function can be used as a slightly more aggressive replacement for +/// isLoopInvariant. +/// +/// If InsertPt is specified, it is the point to hoist instructions to. +/// If null, the terminator of the loop preheader is used. +/// +bool Loop::makeLoopInvariant(Value *V, bool &Changed, + Instruction *InsertPt) const { + if (Instruction *I = dyn_cast<Instruction>(V)) + return makeLoopInvariant(I, Changed, InsertPt); + return true; // All non-instructions are loop-invariant. +} + +/// makeLoopInvariant - If the given instruction is inside of the +/// loop and it can be hoisted, do so to make it trivially loop-invariant. +/// Return true if the instruction after any hoisting is loop invariant. This +/// function can be used as a slightly more aggressive replacement for +/// isLoopInvariant. +/// +/// If InsertPt is specified, it is the point to hoist instructions to. +/// If null, the terminator of the loop preheader is used. +/// +bool Loop::makeLoopInvariant(Instruction *I, bool &Changed, + Instruction *InsertPt) const { + // Test if the value is already loop-invariant. + if (isLoopInvariant(I)) + return true; + if (!I->isSafeToSpeculativelyExecute()) + return false; + if (I->mayReadFromMemory()) + return false; + // Determine the insertion point, unless one was given. + if (!InsertPt) { + BasicBlock *Preheader = getLoopPreheader(); + // Without a preheader, hoisting is not feasible. + if (!Preheader) + return false; + InsertPt = Preheader->getTerminator(); + } + // Don't hoist instructions with loop-variant operands. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (!makeLoopInvariant(I->getOperand(i), Changed, InsertPt)) + return false; + + // Hoist. + I->moveBefore(InsertPt); + Changed = true; + return true; +} + +/// getCanonicalInductionVariable - Check to see if the loop has a canonical +/// induction variable: an integer recurrence that starts at 0 and increments +/// by one each time through the loop. If so, return the phi node that +/// corresponds to it. +/// +/// The IndVarSimplify pass transforms loops to have a canonical induction +/// variable. +/// +PHINode *Loop::getCanonicalInductionVariable() const { + BasicBlock *H = getHeader(); + + BasicBlock *Incoming = 0, *Backedge = 0; + pred_iterator PI = pred_begin(H); + assert(PI != pred_end(H) && + "Loop must have at least one backedge!"); + Backedge = *PI++; + if (PI == pred_end(H)) return 0; // dead loop + Incoming = *PI++; + if (PI != pred_end(H)) return 0; // multiple backedges? + + if (contains(Incoming)) { + if (contains(Backedge)) + return 0; + std::swap(Incoming, Backedge); + } else if (!contains(Backedge)) + return 0; + + // Loop over all of the PHI nodes, looking for a canonical indvar. + for (BasicBlock::iterator I = H->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + if (ConstantInt *CI = + dyn_cast<ConstantInt>(PN->getIncomingValueForBlock(Incoming))) + if (CI->isNullValue()) + if (Instruction *Inc = + dyn_cast<Instruction>(PN->getIncomingValueForBlock(Backedge))) + if (Inc->getOpcode() == Instruction::Add && + Inc->getOperand(0) == PN) + if (ConstantInt *CI = dyn_cast<ConstantInt>(Inc->getOperand(1))) + if (CI->equalsInt(1)) + return PN; + } + return 0; +} + +/// getTripCount - Return a loop-invariant LLVM value indicating the number of +/// times the loop will be executed. Note that this means that the backedge +/// of the loop executes N-1 times. If the trip-count cannot be determined, +/// this returns null. +/// +/// The IndVarSimplify pass transforms loops to have a form that this +/// function easily understands. +/// +Value *Loop::getTripCount() const { + // Canonical loops will end with a 'cmp ne I, V', where I is the incremented + // canonical induction variable and V is the trip count of the loop. + PHINode *IV = getCanonicalInductionVariable(); + if (IV == 0 || IV->getNumIncomingValues() != 2) return 0; + + bool P0InLoop = contains(IV->getIncomingBlock(0)); + Value *Inc = IV->getIncomingValue(!P0InLoop); + BasicBlock *BackedgeBlock = IV->getIncomingBlock(!P0InLoop); + + if (BranchInst *BI = dyn_cast<BranchInst>(BackedgeBlock->getTerminator())) + if (BI->isConditional()) { + if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) { + if (ICI->getOperand(0) == Inc) { + if (BI->getSuccessor(0) == getHeader()) { + if (ICI->getPredicate() == ICmpInst::ICMP_NE) + return ICI->getOperand(1); + } else if (ICI->getPredicate() == ICmpInst::ICMP_EQ) { + return ICI->getOperand(1); + } + } + } + } + + return 0; +} + +/// getSmallConstantTripCount - Returns the trip count of this loop as a +/// normal unsigned value, if possible. Returns 0 if the trip count is unknown +/// or not constant. Will also return 0 if the trip count is very large +/// (>= 2^32) +unsigned Loop::getSmallConstantTripCount() const { + Value* TripCount = this->getTripCount(); + if (TripCount) { + if (ConstantInt *TripCountC = dyn_cast<ConstantInt>(TripCount)) { + // Guard against huge trip counts. + if (TripCountC->getValue().getActiveBits() <= 32) { + return (unsigned)TripCountC->getZExtValue(); + } + } + } + return 0; +} + +/// getSmallConstantTripMultiple - Returns the largest constant divisor of the +/// trip count of this loop as a normal unsigned value, if possible. This +/// means that the actual trip count is always a multiple of the returned +/// value (don't forget the trip count could very well be zero as well!). +/// +/// Returns 1 if the trip count is unknown or not guaranteed to be the +/// multiple of a constant (which is also the case if the trip count is simply +/// constant, use getSmallConstantTripCount for that case), Will also return 1 +/// if the trip count is very large (>= 2^32). +unsigned Loop::getSmallConstantTripMultiple() const { + Value* TripCount = this->getTripCount(); + // This will hold the ConstantInt result, if any + ConstantInt *Result = NULL; + if (TripCount) { + // See if the trip count is constant itself + Result = dyn_cast<ConstantInt>(TripCount); + // if not, see if it is a multiplication + if (!Result) + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TripCount)) { + switch (BO->getOpcode()) { + case BinaryOperator::Mul: + Result = dyn_cast<ConstantInt>(BO->getOperand(1)); + break; + case BinaryOperator::Shl: + if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) + if (CI->getValue().getActiveBits() <= 5) + return 1u << CI->getZExtValue(); + break; + default: + break; + } + } + } + // Guard against huge trip counts. + if (Result && Result->getValue().getActiveBits() <= 32) { + return (unsigned)Result->getZExtValue(); + } else { + return 1; + } +} + +/// isLCSSAForm - Return true if the Loop is in LCSSA form +bool Loop::isLCSSAForm(DominatorTree &DT) const { + // Sort the blocks vector so that we can use binary search to do quick + // lookups. + SmallPtrSet<BasicBlock*, 16> LoopBBs(block_begin(), block_end()); + + for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) { + BasicBlock *BB = *BI; + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I) + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; + ++UI) { + User *U = *UI; + BasicBlock *UserBB = cast<Instruction>(U)->getParent(); + if (PHINode *P = dyn_cast<PHINode>(U)) + UserBB = P->getIncomingBlock(UI); + + // Check the current block, as a fast-path, before checking whether + // the use is anywhere in the loop. Most values are used in the same + // block they are defined in. Also, blocks not reachable from the + // entry are special; uses in them don't need to go through PHIs. + if (UserBB != BB && + !LoopBBs.count(UserBB) && + DT.isReachableFromEntry(UserBB)) + return false; + } + } + + return true; +} + +/// isLoopSimplifyForm - Return true if the Loop is in the form that +/// the LoopSimplify form transforms loops to, which is sometimes called +/// normal form. +bool Loop::isLoopSimplifyForm() const { + // Normal-form loops have a preheader, a single backedge, and all of their + // exits have all their predecessors inside the loop. + return getLoopPreheader() && getLoopLatch() && hasDedicatedExits(); +} + +/// hasDedicatedExits - Return true if no exit block for the loop +/// has a predecessor that is outside the loop. +bool Loop::hasDedicatedExits() const { + // Sort the blocks vector so that we can use binary search to do quick + // lookups. + SmallPtrSet<BasicBlock *, 16> LoopBBs(block_begin(), block_end()); + // Each predecessor of each exit block of a normal loop is contained + // within the loop. + SmallVector<BasicBlock *, 4> ExitBlocks; + getExitBlocks(ExitBlocks); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + for (pred_iterator PI = pred_begin(ExitBlocks[i]), + PE = pred_end(ExitBlocks[i]); PI != PE; ++PI) + if (!LoopBBs.count(*PI)) + return false; + // All the requirements are met. + return true; +} + +/// getUniqueExitBlocks - Return all unique successor blocks of this loop. +/// These are the blocks _outside of the current loop_ which are branched to. +/// This assumes that loop exits are in canonical form. +/// +void +Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const { + assert(hasDedicatedExits() && + "getUniqueExitBlocks assumes the loop has canonical form exits!"); + + // Sort the blocks vector so that we can use binary search to do quick + // lookups. + SmallVector<BasicBlock *, 128> LoopBBs(block_begin(), block_end()); + std::sort(LoopBBs.begin(), LoopBBs.end()); + + SmallVector<BasicBlock *, 32> switchExitBlocks; + + for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) { + + BasicBlock *current = *BI; + switchExitBlocks.clear(); + + for (succ_iterator I = succ_begin(*BI), E = succ_end(*BI); I != E; ++I) { + // If block is inside the loop then it is not a exit block. + if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I)) + continue; + + pred_iterator PI = pred_begin(*I); + BasicBlock *firstPred = *PI; + + // If current basic block is this exit block's first predecessor + // then only insert exit block in to the output ExitBlocks vector. + // This ensures that same exit block is not inserted twice into + // ExitBlocks vector. + if (current != firstPred) + continue; + + // If a terminator has more then two successors, for example SwitchInst, + // then it is possible that there are multiple edges from current block + // to one exit block. + if (std::distance(succ_begin(current), succ_end(current)) <= 2) { + ExitBlocks.push_back(*I); + continue; + } + + // In case of multiple edges from current block to exit block, collect + // only one edge in ExitBlocks. Use switchExitBlocks to keep track of + // duplicate edges. + if (std::find(switchExitBlocks.begin(), switchExitBlocks.end(), *I) + == switchExitBlocks.end()) { + switchExitBlocks.push_back(*I); + ExitBlocks.push_back(*I); + } + } + } +} + +/// getUniqueExitBlock - If getUniqueExitBlocks would return exactly one +/// block, return that block. Otherwise return null. +BasicBlock *Loop::getUniqueExitBlock() const { + SmallVector<BasicBlock *, 8> UniqueExitBlocks; + getUniqueExitBlocks(UniqueExitBlocks); + if (UniqueExitBlocks.size() == 1) + return UniqueExitBlocks[0]; + return 0; +} + +void Loop::dump() const { + print(dbgs()); +} + +//===----------------------------------------------------------------------===// +// LoopInfo implementation +// +bool LoopInfo::runOnFunction(Function &) { + releaseMemory(); + LI.Calculate(getAnalysis<DominatorTree>().getBase()); // Update + return false; +} + +void LoopInfo::verifyAnalysis() const { + // LoopInfo is a FunctionPass, but verifying every loop in the function + // each time verifyAnalysis is called is very expensive. The + // -verify-loop-info option can enable this. In order to perform some + // checking by default, LoopPass has been taught to call verifyLoop + // manually during loop pass sequences. + + if (!VerifyLoopInfo) return; + + for (iterator I = begin(), E = end(); I != E; ++I) { + assert(!(*I)->getParentLoop() && "Top-level loop has a parent!"); + (*I)->verifyLoopNest(); + } + + // TODO: check BBMap consistency. +} + +void LoopInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<DominatorTree>(); +} + +void LoopInfo::print(raw_ostream &OS, const Module*) const { + LI.print(OS); +} + diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp new file mode 100644 index 0000000..10e3f29 --- /dev/null +++ b/contrib/llvm/lib/Analysis/LoopPass.cpp @@ -0,0 +1,422 @@ +//===- LoopPass.cpp - Loop Pass and Loop Pass Manager ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements LoopPass and LPPassManager. All loop optimization +// and transformation passes are derived from LoopPass. LPPassManager is +// responsible for managing LoopPasses. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/LoopPass.h" +#include "llvm/DebugInfoProbe.h" +#include "llvm/Assembly/PrintModulePass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Timer.h" +using namespace llvm; + +namespace { + +/// PrintLoopPass - Print a Function corresponding to a Loop. +/// +class PrintLoopPass : public LoopPass { +private: + std::string Banner; + raw_ostream &Out; // raw_ostream to print on. + +public: + static char ID; + PrintLoopPass(const std::string &B, raw_ostream &o) + : LoopPass(ID), Banner(B), Out(o) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + bool runOnLoop(Loop *L, LPPassManager &) { + Out << Banner; + for (Loop::block_iterator b = L->block_begin(), be = L->block_end(); + b != be; + ++b) { + (*b)->print(Out); + } + return false; + } +}; + +char PrintLoopPass::ID = 0; +} + +//===----------------------------------------------------------------------===// +// DebugInfoProbe + +static DebugInfoProbeInfo *TheDebugProbe; +static void createDebugInfoProbe() { + if (TheDebugProbe) return; + + // Constructed the first time this is called. This guarantees that the + // object will be constructed, if -enable-debug-info-probe is set, + // before static globals, thus it will be destroyed before them. + static ManagedStatic<DebugInfoProbeInfo> DIP; + TheDebugProbe = &*DIP; +} + +//===----------------------------------------------------------------------===// +// LPPassManager +// + +char LPPassManager::ID = 0; + +LPPassManager::LPPassManager(int Depth) + : FunctionPass(ID), PMDataManager(Depth) { + skipThisLoop = false; + redoThisLoop = false; + LI = NULL; + CurrentLoop = NULL; +} + +/// Delete loop from the loop queue and loop hierarchy (LoopInfo). +void LPPassManager::deleteLoopFromQueue(Loop *L) { + + if (Loop *ParentLoop = L->getParentLoop()) { // Not a top-level loop. + // Reparent all of the blocks in this loop. Since BBLoop had a parent, + // they are now all in it. + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) + if (LI->getLoopFor(*I) == L) // Don't change blocks in subloops. + LI->changeLoopFor(*I, ParentLoop); + + // Remove the loop from its parent loop. + for (Loop::iterator I = ParentLoop->begin(), E = ParentLoop->end();; + ++I) { + assert(I != E && "Couldn't find loop"); + if (*I == L) { + ParentLoop->removeChildLoop(I); + break; + } + } + + // Move all subloops into the parent loop. + while (!L->empty()) + ParentLoop->addChildLoop(L->removeChildLoop(L->end()-1)); + } else { + // Reparent all of the blocks in this loop. Since BBLoop had no parent, + // they no longer in a loop at all. + + for (unsigned i = 0; i != L->getBlocks().size(); ++i) { + // Don't change blocks in subloops. + if (LI->getLoopFor(L->getBlocks()[i]) == L) { + LI->removeBlock(L->getBlocks()[i]); + --i; + } + } + + // Remove the loop from the top-level LoopInfo object. + for (LoopInfo::iterator I = LI->begin(), E = LI->end();; ++I) { + assert(I != E && "Couldn't find loop"); + if (*I == L) { + LI->removeLoop(I); + break; + } + } + + // Move all of the subloops to the top-level. + while (!L->empty()) + LI->addTopLevelLoop(L->removeChildLoop(L->end()-1)); + } + + delete L; + + // If L is current loop then skip rest of the passes and let + // runOnFunction remove L from LQ. Otherwise, remove L from LQ now + // and continue applying other passes on CurrentLoop. + if (CurrentLoop == L) { + skipThisLoop = true; + return; + } + + for (std::deque<Loop *>::iterator I = LQ.begin(), + E = LQ.end(); I != E; ++I) { + if (*I == L) { + LQ.erase(I); + break; + } + } +} + +// Inset loop into loop nest (LoopInfo) and loop queue (LQ). +void LPPassManager::insertLoop(Loop *L, Loop *ParentLoop) { + + assert (CurrentLoop != L && "Cannot insert CurrentLoop"); + + // Insert into loop nest + if (ParentLoop) + ParentLoop->addChildLoop(L); + else + LI->addTopLevelLoop(L); + + insertLoopIntoQueue(L); +} + +void LPPassManager::insertLoopIntoQueue(Loop *L) { + // Insert L into loop queue + if (L == CurrentLoop) + redoLoop(L); + else if (!L->getParentLoop()) + // This is top level loop. + LQ.push_front(L); + else { + // Insert L after the parent loop. + for (std::deque<Loop *>::iterator I = LQ.begin(), + E = LQ.end(); I != E; ++I) { + if (*I == L->getParentLoop()) { + // deque does not support insert after. + ++I; + LQ.insert(I, 1, L); + break; + } + } + } +} + +// Reoptimize this loop. LPPassManager will re-insert this loop into the +// queue. This allows LoopPass to change loop nest for the loop. This +// utility may send LPPassManager into infinite loops so use caution. +void LPPassManager::redoLoop(Loop *L) { + assert (CurrentLoop == L && "Can redo only CurrentLoop"); + redoThisLoop = true; +} + +/// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for +/// all loop passes. +void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From, + BasicBlock *To, Loop *L) { + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + LoopPass *LP = getContainedPass(Index); + LP->cloneBasicBlockAnalysis(From, To, L); + } +} + +/// deleteSimpleAnalysisValue - Invoke deleteAnalysisValue hook for all passes. +void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) { + if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) { + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; + ++BI) { + Instruction &I = *BI; + deleteSimpleAnalysisValue(&I, L); + } + } + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + LoopPass *LP = getContainedPass(Index); + LP->deleteAnalysisValue(V, L); + } +} + + +// Recurse through all subloops and all loops into LQ. +static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) { + LQ.push_back(L); + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + addLoopIntoQueue(*I, LQ); +} + +/// Pass Manager itself does not invalidate any analysis info. +void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const { + // LPPassManager needs LoopInfo. In the long term LoopInfo class will + // become part of LPPassManager. + Info.addRequired<LoopInfo>(); + Info.setPreservesAll(); +} + +/// run - Execute all of the passes scheduled for execution. Keep track of +/// whether any of the passes modifies the function, and if so, return true. +bool LPPassManager::runOnFunction(Function &F) { + LI = &getAnalysis<LoopInfo>(); + bool Changed = false; + createDebugInfoProbe(); + + // Collect inherited analysis from Module level pass manager. + populateInheritedAnalysis(TPM->activeStack); + + // Populate Loop Queue + for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + addLoopIntoQueue(*I, LQ); + + if (LQ.empty()) // No loops, skip calling finalizers + return false; + + // Initialization + for (std::deque<Loop *>::const_iterator I = LQ.begin(), E = LQ.end(); + I != E; ++I) { + Loop *L = *I; + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + LoopPass *P = getContainedPass(Index); + Changed |= P->doInitialization(L, *this); + } + } + + // Walk Loops + while (!LQ.empty()) { + + CurrentLoop = LQ.back(); + skipThisLoop = false; + redoThisLoop = false; + + // Run all passes on the current Loop. + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + LoopPass *P = getContainedPass(Index); + dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG, + CurrentLoop->getHeader()->getName()); + dumpRequiredSet(P); + + initializeAnalysisImpl(P); + if (TheDebugProbe) + TheDebugProbe->initialize(P, F); + { + PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader()); + TimeRegion PassTimer(getPassTimer(P)); + + Changed |= P->runOnLoop(CurrentLoop, *this); + } + if (TheDebugProbe) + TheDebugProbe->finalize(P, F); + + if (Changed) + dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG, + skipThisLoop ? "<deleted>" : + CurrentLoop->getHeader()->getName()); + dumpPreservedSet(P); + + if (!skipThisLoop) { + // Manually check that this loop is still healthy. This is done + // instead of relying on LoopInfo::verifyLoop since LoopInfo + // is a function pass and it's really expensive to verify every + // loop in the function every time. That level of checking can be + // enabled with the -verify-loop-info option. + { + TimeRegion PassTimer(getPassTimer(LI)); + CurrentLoop->verifyLoop(); + } + + // Then call the regular verifyAnalysis functions. + verifyPreservedAnalysis(P); + } + + removeNotPreservedAnalysis(P); + recordAvailableAnalysis(P); + removeDeadPasses(P, + skipThisLoop ? "<deleted>" : + CurrentLoop->getHeader()->getName(), + ON_LOOP_MSG); + + if (skipThisLoop) + // Do not run other passes on this loop. + break; + } + + // If the loop was deleted, release all the loop passes. This frees up + // some memory, and avoids trouble with the pass manager trying to call + // verifyAnalysis on them. + if (skipThisLoop) + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + freePass(P, "<deleted>", ON_LOOP_MSG); + } + + // Pop the loop from queue after running all passes. + LQ.pop_back(); + + if (redoThisLoop) + LQ.push_back(CurrentLoop); + } + + // Finalization + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + LoopPass *P = getContainedPass(Index); + Changed |= P->doFinalization(); + } + + return Changed; +} + +/// Print passes managed by this manager +void LPPassManager::dumpPassStructure(unsigned Offset) { + errs().indent(Offset*2) << "Loop Pass Manager\n"; + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + P->dumpPassStructure(Offset + 1); + dumpLastUses(P, Offset+1); + } +} + + +//===----------------------------------------------------------------------===// +// LoopPass + +Pass *LoopPass::createPrinterPass(raw_ostream &O, + const std::string &Banner) const { + return new PrintLoopPass(Banner, O); +} + +// Check if this pass is suitable for the current LPPassManager, if +// available. This pass P is not suitable for a LPPassManager if P +// is not preserving higher level analysis info used by other +// LPPassManager passes. In such case, pop LPPassManager from the +// stack. This will force assignPassManager() to create new +// LPPassManger as expected. +void LoopPass::preparePassManager(PMStack &PMS) { + + // Find LPPassManager + while (!PMS.empty() && + PMS.top()->getPassManagerType() > PMT_LoopPassManager) + PMS.pop(); + + // If this pass is destroying high level information that is used + // by other passes that are managed by LPM then do not insert + // this pass in current LPM. Use new LPPassManager. + if (PMS.top()->getPassManagerType() == PMT_LoopPassManager && + !PMS.top()->preserveHigherLevelAnalysis(this)) + PMS.pop(); +} + +/// Assign pass manager to manage this pass. +void LoopPass::assignPassManager(PMStack &PMS, + PassManagerType PreferredType) { + // Find LPPassManager + while (!PMS.empty() && + PMS.top()->getPassManagerType() > PMT_LoopPassManager) + PMS.pop(); + + LPPassManager *LPPM; + if (PMS.top()->getPassManagerType() == PMT_LoopPassManager) + LPPM = (LPPassManager*)PMS.top(); + else { + // Create new Loop Pass Manager if it does not exist. + assert (!PMS.empty() && "Unable to create Loop Pass Manager"); + PMDataManager *PMD = PMS.top(); + + // [1] Create new Call Graph Pass Manager + LPPM = new LPPassManager(PMD->getDepth() + 1); + LPPM->populateInheritedAnalysis(PMS); + + // [2] Set up new manager's top level manager + PMTopLevelManager *TPM = PMD->getTopLevelManager(); + TPM->addIndirectPassManager(LPPM); + + // [3] Assign manager to manage this new manager. This may create + // and push new managers into PMS + Pass *P = LPPM->getAsPass(); + TPM->schedulePass(P); + + // [4] Push new manager into PMS + PMS.push(LPPM); + } + + LPPM->add(this); +} diff --git a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp new file mode 100644 index 0000000..2283db0 --- /dev/null +++ b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp @@ -0,0 +1,176 @@ +//===- MemDepPrinter.cpp - Printer for MemoryDependenceAnalysis -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/LLVMContext.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SetVector.h" +using namespace llvm; + +namespace { + struct MemDepPrinter : public FunctionPass { + const Function *F; + + typedef PointerIntPair<const Instruction *, 1> InstAndClobberFlag; + typedef std::pair<InstAndClobberFlag, const BasicBlock *> Dep; + typedef SmallSetVector<Dep, 4> DepSet; + typedef DenseMap<const Instruction *, DepSet> DepSetMap; + DepSetMap Deps; + + static char ID; // Pass identifcation, replacement for typeid + MemDepPrinter() : FunctionPass(ID) { + initializeMemDepPrinterPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F); + + void print(raw_ostream &OS, const Module * = 0) const; + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredTransitive<AliasAnalysis>(); + AU.addRequiredTransitive<MemoryDependenceAnalysis>(); + AU.setPreservesAll(); + } + + virtual void releaseMemory() { + Deps.clear(); + F = 0; + } + }; +} + +char MemDepPrinter::ID = 0; +INITIALIZE_PASS_BEGIN(MemDepPrinter, "print-memdeps", + "Print MemDeps of function", false, true) +INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) +INITIALIZE_PASS_END(MemDepPrinter, "print-memdeps", + "Print MemDeps of function", false, true) + +FunctionPass *llvm::createMemDepPrinter() { + return new MemDepPrinter(); +} + +bool MemDepPrinter::runOnFunction(Function &F) { + this->F = &F; + AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); + MemoryDependenceAnalysis &MDA = getAnalysis<MemoryDependenceAnalysis>(); + + // All this code uses non-const interfaces because MemDep is not + // const-friendly, though nothing is actually modified. + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + Instruction *Inst = &*I; + + if (!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory()) + continue; + + MemDepResult Res = MDA.getDependency(Inst); + if (!Res.isNonLocal()) { + assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) && + "Local dep should be unknown, def or clobber!"); + Deps[Inst].insert(std::make_pair(InstAndClobberFlag(Res.getInst(), + Res.isClobber()), + static_cast<BasicBlock *>(0))); + } else if (CallSite CS = cast<Value>(Inst)) { + const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI = + MDA.getNonLocalCallDependency(CS); + + DepSet &InstDeps = Deps[Inst]; + for (MemoryDependenceAnalysis::NonLocalDepInfo::const_iterator + I = NLDI.begin(), E = NLDI.end(); I != E; ++I) { + const MemDepResult &Res = I->getResult(); + assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) && + "Resolved non-local call dep should be unknown, def or " + "clobber!"); + InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(), + Res.isClobber()), + I->getBB())); + } + } else { + SmallVector<NonLocalDepResult, 4> NLDI; + if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + // FIXME: Volatile is not handled properly here. + AliasAnalysis::Location Loc = AA.getLocation(LI); + MDA.getNonLocalPointerDependency(Loc, !LI->isVolatile(), + LI->getParent(), NLDI); + } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + // FIXME: Volatile is not handled properly here. + AliasAnalysis::Location Loc = AA.getLocation(SI); + MDA.getNonLocalPointerDependency(Loc, false, SI->getParent(), NLDI); + } else if (VAArgInst *VI = dyn_cast<VAArgInst>(Inst)) { + AliasAnalysis::Location Loc = AA.getLocation(VI); + MDA.getNonLocalPointerDependency(Loc, false, VI->getParent(), NLDI); + } else { + llvm_unreachable("Unknown memory instruction!"); + } + + DepSet &InstDeps = Deps[Inst]; + for (SmallVectorImpl<NonLocalDepResult>::const_iterator + I = NLDI.begin(), E = NLDI.end(); I != E; ++I) { + const MemDepResult &Res = I->getResult(); + assert(Res.isClobber() != Res.isDef() && + "Resolved non-local pointer dep should be def or clobber!"); + InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(), + Res.isClobber()), + I->getBB())); + } + } + } + + return false; +} + +void MemDepPrinter::print(raw_ostream &OS, const Module *M) const { + for (const_inst_iterator I = inst_begin(*F), E = inst_end(*F); I != E; ++I) { + const Instruction *Inst = &*I; + + DepSetMap::const_iterator DI = Deps.find(Inst); + if (DI == Deps.end()) + continue; + + const DepSet &InstDeps = DI->second; + + for (DepSet::const_iterator I = InstDeps.begin(), E = InstDeps.end(); + I != E; ++I) { + const Instruction *DepInst = I->first.getPointer(); + bool isClobber = I->first.getInt(); + const BasicBlock *DepBB = I->second; + + OS << " "; + if (!DepInst) + OS << "Unknown"; + else if (isClobber) + OS << "Clobber"; + else + OS << " Def"; + if (DepBB) { + OS << " in block "; + WriteAsOperand(OS, DepBB, /*PrintType=*/false, M); + } + if (DepInst) { + OS << " from: "; + if (DepInst == Inst) + OS << "<unspecified>"; + else + DepInst->print(OS); + } + OS << "\n"; + } + + Inst->print(OS); + OS << "\n\n"; + } +} diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp new file mode 100644 index 0000000..53d4304 --- /dev/null +++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -0,0 +1,213 @@ +//===------ MemoryBuiltins.cpp - Identify calls to memory builtins --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of functions identifies calls to builtin functions that allocate +// or free memory. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Target/TargetData.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// malloc Call Utility Functions. +// + +/// isMalloc - Returns true if the value is either a malloc call or a +/// bitcast of the result of a malloc call. +bool llvm::isMalloc(const Value *I) { + return extractMallocCall(I) || extractMallocCallFromBitCast(I); +} + +static bool isMallocCall(const CallInst *CI) { + if (!CI) + return false; + + Function *Callee = CI->getCalledFunction(); + if (Callee == 0 || !Callee->isDeclaration()) + return false; + if (Callee->getName() != "malloc" && + Callee->getName() != "_Znwj" && // operator new(unsigned int) + Callee->getName() != "_Znwm" && // operator new(unsigned long) + Callee->getName() != "_Znaj" && // operator new[](unsigned int) + Callee->getName() != "_Znam") // operator new[](unsigned long) + return false; + + // Check malloc prototype. + // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin + // attribute will exist. + const FunctionType *FTy = Callee->getFunctionType(); + if (FTy->getNumParams() != 1) + return false; + return FTy->getParamType(0)->isIntegerTy(32) || + FTy->getParamType(0)->isIntegerTy(64); +} + +/// extractMallocCall - Returns the corresponding CallInst if the instruction +/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we +/// ignore InvokeInst here. +const CallInst *llvm::extractMallocCall(const Value *I) { + const CallInst *CI = dyn_cast<CallInst>(I); + return (isMallocCall(CI)) ? CI : NULL; +} + +CallInst *llvm::extractMallocCall(Value *I) { + CallInst *CI = dyn_cast<CallInst>(I); + return (isMallocCall(CI)) ? CI : NULL; +} + +static bool isBitCastOfMallocCall(const BitCastInst *BCI) { + if (!BCI) + return false; + + return isMallocCall(dyn_cast<CallInst>(BCI->getOperand(0))); +} + +/// extractMallocCallFromBitCast - Returns the corresponding CallInst if the +/// instruction is a bitcast of the result of a malloc call. +CallInst *llvm::extractMallocCallFromBitCast(Value *I) { + BitCastInst *BCI = dyn_cast<BitCastInst>(I); + return (isBitCastOfMallocCall(BCI)) ? cast<CallInst>(BCI->getOperand(0)) + : NULL; +} + +const CallInst *llvm::extractMallocCallFromBitCast(const Value *I) { + const BitCastInst *BCI = dyn_cast<BitCastInst>(I); + return (isBitCastOfMallocCall(BCI)) ? cast<CallInst>(BCI->getOperand(0)) + : NULL; +} + +static Value *computeArraySize(const CallInst *CI, const TargetData *TD, + bool LookThroughSExt = false) { + if (!CI) + return NULL; + + // The size of the malloc's result type must be known to determine array size. + const Type *T = getMallocAllocatedType(CI); + if (!T || !T->isSized() || !TD) + return NULL; + + unsigned ElementSize = TD->getTypeAllocSize(T); + if (const StructType *ST = dyn_cast<StructType>(T)) + ElementSize = TD->getStructLayout(ST)->getSizeInBytes(); + + // If malloc call's arg can be determined to be a multiple of ElementSize, + // return the multiple. Otherwise, return NULL. + Value *MallocArg = CI->getArgOperand(0); + Value *Multiple = NULL; + if (ComputeMultiple(MallocArg, ElementSize, Multiple, + LookThroughSExt)) + return Multiple; + + return NULL; +} + +/// isArrayMalloc - Returns the corresponding CallInst if the instruction +/// is a call to malloc whose array size can be determined and the array size +/// is not constant 1. Otherwise, return NULL. +const CallInst *llvm::isArrayMalloc(const Value *I, const TargetData *TD) { + const CallInst *CI = extractMallocCall(I); + Value *ArraySize = computeArraySize(CI, TD); + + if (ArraySize && + ArraySize != ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) + return CI; + + // CI is a non-array malloc or we can't figure out that it is an array malloc. + return NULL; +} + +/// getMallocType - Returns the PointerType resulting from the malloc call. +/// The PointerType depends on the number of bitcast uses of the malloc call: +/// 0: PointerType is the calls' return type. +/// 1: PointerType is the bitcast's result type. +/// >1: Unique PointerType cannot be determined, return NULL. +const PointerType *llvm::getMallocType(const CallInst *CI) { + assert(isMalloc(CI) && "getMallocType and not malloc call"); + + const PointerType *MallocType = NULL; + unsigned NumOfBitCastUses = 0; + + // Determine if CallInst has a bitcast use. + for (Value::const_use_iterator UI = CI->use_begin(), E = CI->use_end(); + UI != E; ) + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(*UI++)) { + MallocType = cast<PointerType>(BCI->getDestTy()); + NumOfBitCastUses++; + } + + // Malloc call has 1 bitcast use, so type is the bitcast's destination type. + if (NumOfBitCastUses == 1) + return MallocType; + + // Malloc call was not bitcast, so type is the malloc function's return type. + if (NumOfBitCastUses == 0) + return cast<PointerType>(CI->getType()); + + // Type could not be determined. + return NULL; +} + +/// getMallocAllocatedType - Returns the Type allocated by malloc call. +/// The Type depends on the number of bitcast uses of the malloc call: +/// 0: PointerType is the malloc calls' return type. +/// 1: PointerType is the bitcast's result type. +/// >1: Unique PointerType cannot be determined, return NULL. +const Type *llvm::getMallocAllocatedType(const CallInst *CI) { + const PointerType *PT = getMallocType(CI); + return PT ? PT->getElementType() : NULL; +} + +/// getMallocArraySize - Returns the array size of a malloc call. If the +/// argument passed to malloc is a multiple of the size of the malloced type, +/// then return that multiple. For non-array mallocs, the multiple is +/// constant 1. Otherwise, return NULL for mallocs whose array size cannot be +/// determined. +Value *llvm::getMallocArraySize(CallInst *CI, const TargetData *TD, + bool LookThroughSExt) { + assert(isMalloc(CI) && "getMallocArraySize and not malloc call"); + return computeArraySize(CI, TD, LookThroughSExt); +} + +//===----------------------------------------------------------------------===// +// free Call Utility Functions. +// + +/// isFreeCall - Returns non-null if the value is a call to the builtin free() +const CallInst *llvm::isFreeCall(const Value *I) { + const CallInst *CI = dyn_cast<CallInst>(I); + if (!CI) + return 0; + Function *Callee = CI->getCalledFunction(); + if (Callee == 0 || !Callee->isDeclaration()) + return 0; + + if (Callee->getName() != "free" && + Callee->getName() != "_ZdlPv" && // operator delete(void*) + Callee->getName() != "_ZdaPv") // operator delete[](void*) + return 0; + + // Check free prototype. + // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin + // attribute will exist. + const FunctionType *FTy = Callee->getFunctionType(); + if (!FTy->getReturnType()->isVoidTy()) + return 0; + if (FTy->getNumParams() != 1) + return 0; + if (FTy->getParamType(0) != Type::getInt8PtrTy(Callee->getContext())) + return 0; + + return CI; +} diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp new file mode 100644 index 0000000..bba4482 --- /dev/null +++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -0,0 +1,1469 @@ +//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements an analysis that determines, for a given memory +// operation, what preceding memory operations it depends on. It builds on +// alias analysis information, and tries to provide a lazy, caching interface to +// a common kind of alias information query. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "memdep" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Function.h" +#include "llvm/LLVMContext.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/PredIteratorCache.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetData.h" +using namespace llvm; + +STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses"); +STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses"); +STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses"); + +STATISTIC(NumCacheNonLocalPtr, + "Number of fully cached non-local ptr responses"); +STATISTIC(NumCacheDirtyNonLocalPtr, + "Number of cached, but dirty, non-local ptr responses"); +STATISTIC(NumUncacheNonLocalPtr, + "Number of uncached non-local ptr responses"); +STATISTIC(NumCacheCompleteNonLocalPtr, + "Number of block queries that were completely cached"); + +// Limit for the number of instructions to scan in a block. +// FIXME: Figure out what a sane value is for this. +// (500 is relatively insane.) +static const int BlockScanLimit = 500; + +char MemoryDependenceAnalysis::ID = 0; + +// Register this pass... +INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep", + "Memory Dependence Analysis", false, true) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep", + "Memory Dependence Analysis", false, true) + +MemoryDependenceAnalysis::MemoryDependenceAnalysis() +: FunctionPass(ID), PredCache(0) { + initializeMemoryDependenceAnalysisPass(*PassRegistry::getPassRegistry()); +} +MemoryDependenceAnalysis::~MemoryDependenceAnalysis() { +} + +/// Clean up memory in between runs +void MemoryDependenceAnalysis::releaseMemory() { + LocalDeps.clear(); + NonLocalDeps.clear(); + NonLocalPointerDeps.clear(); + ReverseLocalDeps.clear(); + ReverseNonLocalDeps.clear(); + ReverseNonLocalPtrDeps.clear(); + PredCache->clear(); +} + + + +/// getAnalysisUsage - Does not modify anything. It uses Alias Analysis. +/// +void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<AliasAnalysis>(); +} + +bool MemoryDependenceAnalysis::runOnFunction(Function &) { + AA = &getAnalysis<AliasAnalysis>(); + TD = getAnalysisIfAvailable<TargetData>(); + if (PredCache == 0) + PredCache.reset(new PredIteratorCache()); + return false; +} + +/// RemoveFromReverseMap - This is a helper function that removes Val from +/// 'Inst's set in ReverseMap. If the set becomes empty, remove Inst's entry. +template <typename KeyTy> +static void RemoveFromReverseMap(DenseMap<Instruction*, + SmallPtrSet<KeyTy, 4> > &ReverseMap, + Instruction *Inst, KeyTy Val) { + typename DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> >::iterator + InstIt = ReverseMap.find(Inst); + assert(InstIt != ReverseMap.end() && "Reverse map out of sync?"); + bool Found = InstIt->second.erase(Val); + assert(Found && "Invalid reverse map!"); (void)Found; + if (InstIt->second.empty()) + ReverseMap.erase(InstIt); +} + +/// GetLocation - If the given instruction references a specific memory +/// location, fill in Loc with the details, otherwise set Loc.Ptr to null. +/// Return a ModRefInfo value describing the general behavior of the +/// instruction. +static +AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, + AliasAnalysis::Location &Loc, + AliasAnalysis *AA) { + if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + if (LI->isVolatile()) { + Loc = AliasAnalysis::Location(); + return AliasAnalysis::ModRef; + } + Loc = AA->getLocation(LI); + return AliasAnalysis::Ref; + } + + if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + if (SI->isVolatile()) { + Loc = AliasAnalysis::Location(); + return AliasAnalysis::ModRef; + } + Loc = AA->getLocation(SI); + return AliasAnalysis::Mod; + } + + if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) { + Loc = AA->getLocation(V); + return AliasAnalysis::ModRef; + } + + if (const CallInst *CI = isFreeCall(Inst)) { + // calls to free() deallocate the entire structure + Loc = AliasAnalysis::Location(CI->getArgOperand(0)); + return AliasAnalysis::Mod; + } + + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) + switch (II->getIntrinsicID()) { + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: + Loc = AliasAnalysis::Location(II->getArgOperand(1), + cast<ConstantInt>(II->getArgOperand(0)) + ->getZExtValue(), + II->getMetadata(LLVMContext::MD_tbaa)); + // These intrinsics don't really modify the memory, but returning Mod + // will allow them to be handled conservatively. + return AliasAnalysis::Mod; + case Intrinsic::invariant_end: + Loc = AliasAnalysis::Location(II->getArgOperand(2), + cast<ConstantInt>(II->getArgOperand(1)) + ->getZExtValue(), + II->getMetadata(LLVMContext::MD_tbaa)); + // These intrinsics don't really modify the memory, but returning Mod + // will allow them to be handled conservatively. + return AliasAnalysis::Mod; + default: + break; + } + + // Otherwise, just do the coarse-grained thing that always works. + if (Inst->mayWriteToMemory()) + return AliasAnalysis::ModRef; + if (Inst->mayReadFromMemory()) + return AliasAnalysis::Ref; + return AliasAnalysis::NoModRef; +} + +/// getCallSiteDependencyFrom - Private helper for finding the local +/// dependencies of a call site. +MemDepResult MemoryDependenceAnalysis:: +getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, + BasicBlock::iterator ScanIt, BasicBlock *BB) { + unsigned Limit = BlockScanLimit; + + // Walk backwards through the block, looking for dependencies + while (ScanIt != BB->begin()) { + // Limit the amount of scanning we do so we don't end up with quadratic + // running time on extreme testcases. + --Limit; + if (!Limit) + return MemDepResult::getUnknown(); + + Instruction *Inst = --ScanIt; + + // If this inst is a memory op, get the pointer it accessed + AliasAnalysis::Location Loc; + AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA); + if (Loc.Ptr) { + // A simple instruction. + if (AA->getModRefInfo(CS, Loc) != AliasAnalysis::NoModRef) + return MemDepResult::getClobber(Inst); + continue; + } + + if (CallSite InstCS = cast<Value>(Inst)) { + // Debug intrinsics don't cause dependences. + if (isa<DbgInfoIntrinsic>(Inst)) continue; + // If these two calls do not interfere, look past it. + switch (AA->getModRefInfo(CS, InstCS)) { + case AliasAnalysis::NoModRef: + // If the two calls are the same, return InstCS as a Def, so that + // CS can be found redundant and eliminated. + if (isReadOnlyCall && !(MR & AliasAnalysis::Mod) && + CS.getInstruction()->isIdenticalToWhenDefined(Inst)) + return MemDepResult::getDef(Inst); + + // Otherwise if the two calls don't interact (e.g. InstCS is readnone) + // keep scanning. + break; + default: + return MemDepResult::getClobber(Inst); + } + } + } + + // No dependence found. If this is the entry block of the function, it is + // unknown, otherwise it is non-local. + if (BB != &BB->getParent()->getEntryBlock()) + return MemDepResult::getNonLocal(); + return MemDepResult::getUnknown(); +} + +/// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that +/// would fully overlap MemLoc if done as a wider legal integer load. +/// +/// MemLocBase, MemLocOffset are lazily computed here the first time the +/// base/offs of memloc is needed. +static bool +isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, + const Value *&MemLocBase, + int64_t &MemLocOffs, + const LoadInst *LI, + const TargetData *TD) { + // If we have no target data, we can't do this. + if (TD == 0) return false; + + // If we haven't already computed the base/offset of MemLoc, do so now. + if (MemLocBase == 0) + MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, *TD); + + unsigned Size = MemoryDependenceAnalysis:: + getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size, + LI, *TD); + return Size != 0; +} + +/// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that +/// looks at a memory location for a load (specified by MemLocBase, Offs, +/// and Size) and compares it against a load. If the specified load could +/// be safely widened to a larger integer load that is 1) still efficient, +/// 2) safe for the target, and 3) would provide the specified memory +/// location value, then this function returns the size in bytes of the +/// load width to use. If not, this returns zero. +unsigned MemoryDependenceAnalysis:: +getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, + unsigned MemLocSize, const LoadInst *LI, + const TargetData &TD) { + // We can only extend non-volatile integer loads. + if (!isa<IntegerType>(LI->getType()) || LI->isVolatile()) return 0; + + // Get the base of this load. + int64_t LIOffs = 0; + const Value *LIBase = + GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, TD); + + // If the two pointers are not based on the same pointer, we can't tell that + // they are related. + if (LIBase != MemLocBase) return 0; + + // Okay, the two values are based on the same pointer, but returned as + // no-alias. This happens when we have things like two byte loads at "P+1" + // and "P+3". Check to see if increasing the size of the "LI" load up to its + // alignment (or the largest native integer type) will allow us to load all + // the bits required by MemLoc. + + // If MemLoc is before LI, then no widening of LI will help us out. + if (MemLocOffs < LIOffs) return 0; + + // Get the alignment of the load in bytes. We assume that it is safe to load + // any legal integer up to this size without a problem. For example, if we're + // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can + // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it + // to i16. + unsigned LoadAlign = LI->getAlignment(); + + int64_t MemLocEnd = MemLocOffs+MemLocSize; + + // If no amount of rounding up will let MemLoc fit into LI, then bail out. + if (LIOffs+LoadAlign < MemLocEnd) return 0; + + // This is the size of the load to try. Start with the next larger power of + // two. + unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U; + NewLoadByteSize = NextPowerOf2(NewLoadByteSize); + + while (1) { + // If this load size is bigger than our known alignment or would not fit + // into a native integer register, then we fail. + if (NewLoadByteSize > LoadAlign || + !TD.fitsInLegalInteger(NewLoadByteSize*8)) + return 0; + + // If a load of this width would include all of MemLoc, then we succeed. + if (LIOffs+NewLoadByteSize >= MemLocEnd) + return NewLoadByteSize; + + NewLoadByteSize <<= 1; + } + + return 0; +} + +/// getPointerDependencyFrom - Return the instruction on which a memory +/// location depends. If isLoad is true, this routine ignores may-aliases with +/// read-only operations. If isLoad is false, this routine ignores may-aliases +/// with reads from read-only locations. +MemDepResult MemoryDependenceAnalysis:: +getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, + BasicBlock::iterator ScanIt, BasicBlock *BB) { + + const Value *MemLocBase = 0; + int64_t MemLocOffset = 0; + + unsigned Limit = BlockScanLimit; + + // Walk backwards through the basic block, looking for dependencies. + while (ScanIt != BB->begin()) { + // Limit the amount of scanning we do so we don't end up with quadratic + // running time on extreme testcases. + --Limit; + if (!Limit) + return MemDepResult::getUnknown(); + + Instruction *Inst = --ScanIt; + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { + // Debug intrinsics don't (and can't) cause dependences. + if (isa<DbgInfoIntrinsic>(II)) continue; + + // If we reach a lifetime begin or end marker, then the query ends here + // because the value is undefined. + if (II->getIntrinsicID() == Intrinsic::lifetime_start) { + // FIXME: This only considers queries directly on the invariant-tagged + // pointer, not on query pointers that are indexed off of them. It'd + // be nice to handle that at some point (the right approach is to use + // GetPointerBaseWithConstantOffset). + if (AA->isMustAlias(AliasAnalysis::Location(II->getArgOperand(1)), + MemLoc)) + return MemDepResult::getDef(II); + continue; + } + } + + // Values depend on loads if the pointers are must aliased. This means that + // a load depends on another must aliased load from the same value. + if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + AliasAnalysis::Location LoadLoc = AA->getLocation(LI); + + // If we found a pointer, check if it could be the same as our pointer. + AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc); + + if (isLoad) { + if (R == AliasAnalysis::NoAlias) { + // If this is an over-aligned integer load (for example, + // "load i8* %P, align 4") see if it would obviously overlap with the + // queried location if widened to a larger load (e.g. if the queried + // location is 1 byte at P+1). If so, return it as a load/load + // clobber result, allowing the client to decide to widen the load if + // it wants to. + if (const IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) + if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() && + isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase, + MemLocOffset, LI, TD)) + return MemDepResult::getClobber(Inst); + + continue; + } + + // Must aliased loads are defs of each other. + if (R == AliasAnalysis::MustAlias) + return MemDepResult::getDef(Inst); + +#if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads + // in terms of clobbering loads, but since it does this by looking + // at the clobbering load directly, it doesn't know about any + // phi translation that may have happened along the way. + + // If we have a partial alias, then return this as a clobber for the + // client to handle. + if (R == AliasAnalysis::PartialAlias) + return MemDepResult::getClobber(Inst); +#endif + + // Random may-alias loads don't depend on each other without a + // dependence. + continue; + } + + // Stores don't depend on other no-aliased accesses. + if (R == AliasAnalysis::NoAlias) + continue; + + // Stores don't alias loads from read-only memory. + if (AA->pointsToConstantMemory(LoadLoc)) + continue; + + // Stores depend on may/must aliased loads. + return MemDepResult::getDef(Inst); + } + + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + // If alias analysis can tell that this store is guaranteed to not modify + // the query pointer, ignore it. Use getModRefInfo to handle cases where + // the query pointer points to constant memory etc. + if (AA->getModRefInfo(SI, MemLoc) == AliasAnalysis::NoModRef) + continue; + + // Ok, this store might clobber the query pointer. Check to see if it is + // a must alias: in this case, we want to return this as a def. + AliasAnalysis::Location StoreLoc = AA->getLocation(SI); + + // If we found a pointer, check if it could be the same as our pointer. + AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc); + + if (R == AliasAnalysis::NoAlias) + continue; + if (R == AliasAnalysis::MustAlias) + return MemDepResult::getDef(Inst); + return MemDepResult::getClobber(Inst); + } + + // If this is an allocation, and if we know that the accessed pointer is to + // the allocation, return Def. This means that there is no dependence and + // the access can be optimized based on that. For example, a load could + // turn into undef. + // Note: Only determine this to be a malloc if Inst is the malloc call, not + // a subsequent bitcast of the malloc call result. There can be stores to + // the malloced memory between the malloc call and its bitcast uses, and we + // need to continue scanning until the malloc call. + if (isa<AllocaInst>(Inst) || + (isa<CallInst>(Inst) && extractMallocCall(Inst))) { + const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD); + + if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr)) + return MemDepResult::getDef(Inst); + continue; + } + + // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. + switch (AA->getModRefInfo(Inst, MemLoc)) { + case AliasAnalysis::NoModRef: + // If the call has no effect on the queried pointer, just ignore it. + continue; + case AliasAnalysis::Mod: + return MemDepResult::getClobber(Inst); + case AliasAnalysis::Ref: + // If the call is known to never store to the pointer, and if this is a + // load query, we can safely ignore it (scan past it). + if (isLoad) + continue; + default: + // Otherwise, there is a potential dependence. Return a clobber. + return MemDepResult::getClobber(Inst); + } + } + + // No dependence found. If this is the entry block of the function, it is + // unknown, otherwise it is non-local. + if (BB != &BB->getParent()->getEntryBlock()) + return MemDepResult::getNonLocal(); + return MemDepResult::getUnknown(); +} + +/// getDependency - Return the instruction on which a memory operation +/// depends. +MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { + Instruction *ScanPos = QueryInst; + + // Check for a cached result + MemDepResult &LocalCache = LocalDeps[QueryInst]; + + // If the cached entry is non-dirty, just return it. Note that this depends + // on MemDepResult's default constructing to 'dirty'. + if (!LocalCache.isDirty()) + return LocalCache; + + // Otherwise, if we have a dirty entry, we know we can start the scan at that + // instruction, which may save us some work. + if (Instruction *Inst = LocalCache.getInst()) { + ScanPos = Inst; + + RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst); + } + + BasicBlock *QueryParent = QueryInst->getParent(); + + // Do the scan. + if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) { + // No dependence found. If this is the entry block of the function, it is + // unknown, otherwise it is non-local. + if (QueryParent != &QueryParent->getParent()->getEntryBlock()) + LocalCache = MemDepResult::getNonLocal(); + else + LocalCache = MemDepResult::getUnknown(); + } else { + AliasAnalysis::Location MemLoc; + AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA); + if (MemLoc.Ptr) { + // If we can do a pointer scan, make it happen. + bool isLoad = !(MR & AliasAnalysis::Mod); + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst)) + isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start; + + LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos, + QueryParent); + } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) { + CallSite QueryCS(QueryInst); + bool isReadOnly = AA->onlyReadsMemory(QueryCS); + LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos, + QueryParent); + } else + // Non-memory instruction. + LocalCache = MemDepResult::getUnknown(); + } + + // Remember the result! + if (Instruction *I = LocalCache.getInst()) + ReverseLocalDeps[I].insert(QueryInst); + + return LocalCache; +} + +#ifndef NDEBUG +/// AssertSorted - This method is used when -debug is specified to verify that +/// cache arrays are properly kept sorted. +static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, + int Count = -1) { + if (Count == -1) Count = Cache.size(); + if (Count == 0) return; + + for (unsigned i = 1; i != unsigned(Count); ++i) + assert(!(Cache[i] < Cache[i-1]) && "Cache isn't sorted!"); +} +#endif + +/// getNonLocalCallDependency - Perform a full dependency query for the +/// specified call, returning the set of blocks that the value is +/// potentially live across. The returned set of results will include a +/// "NonLocal" result for all blocks where the value is live across. +/// +/// This method assumes the instruction returns a "NonLocal" dependency +/// within its own block. +/// +/// This returns a reference to an internal data structure that may be +/// invalidated on the next non-local query or when an instruction is +/// removed. Clients must copy this data if they want it around longer than +/// that. +const MemoryDependenceAnalysis::NonLocalDepInfo & +MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { + assert(getDependency(QueryCS.getInstruction()).isNonLocal() && + "getNonLocalCallDependency should only be used on calls with non-local deps!"); + PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()]; + NonLocalDepInfo &Cache = CacheP.first; + + /// DirtyBlocks - This is the set of blocks that need to be recomputed. In + /// the cached case, this can happen due to instructions being deleted etc. In + /// the uncached case, this starts out as the set of predecessors we care + /// about. + SmallVector<BasicBlock*, 32> DirtyBlocks; + + if (!Cache.empty()) { + // Okay, we have a cache entry. If we know it is not dirty, just return it + // with no computation. + if (!CacheP.second) { + ++NumCacheNonLocal; + return Cache; + } + + // If we already have a partially computed set of results, scan them to + // determine what is dirty, seeding our initial DirtyBlocks worklist. + for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end(); + I != E; ++I) + if (I->getResult().isDirty()) + DirtyBlocks.push_back(I->getBB()); + + // Sort the cache so that we can do fast binary search lookups below. + std::sort(Cache.begin(), Cache.end()); + + ++NumCacheDirtyNonLocal; + //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: " + // << Cache.size() << " cached: " << *QueryInst; + } else { + // Seed DirtyBlocks with each of the preds of QueryInst's block. + BasicBlock *QueryBB = QueryCS.getInstruction()->getParent(); + for (BasicBlock **PI = PredCache->GetPreds(QueryBB); *PI; ++PI) + DirtyBlocks.push_back(*PI); + ++NumUncacheNonLocal; + } + + // isReadonlyCall - If this is a read-only call, we can be more aggressive. + bool isReadonlyCall = AA->onlyReadsMemory(QueryCS); + + SmallPtrSet<BasicBlock*, 64> Visited; + + unsigned NumSortedEntries = Cache.size(); + DEBUG(AssertSorted(Cache)); + + // Iterate while we still have blocks to update. + while (!DirtyBlocks.empty()) { + BasicBlock *DirtyBB = DirtyBlocks.back(); + DirtyBlocks.pop_back(); + + // Already processed this block? + if (!Visited.insert(DirtyBB)) + continue; + + // Do a binary search to see if we already have an entry for this block in + // the cache set. If so, find it. + DEBUG(AssertSorted(Cache, NumSortedEntries)); + NonLocalDepInfo::iterator Entry = + std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries, + NonLocalDepEntry(DirtyBB)); + if (Entry != Cache.begin() && prior(Entry)->getBB() == DirtyBB) + --Entry; + + NonLocalDepEntry *ExistingResult = 0; + if (Entry != Cache.begin()+NumSortedEntries && + Entry->getBB() == DirtyBB) { + // If we already have an entry, and if it isn't already dirty, the block + // is done. + if (!Entry->getResult().isDirty()) + continue; + + // Otherwise, remember this slot so we can update the value. + ExistingResult = &*Entry; + } + + // If the dirty entry has a pointer, start scanning from it so we don't have + // to rescan the entire block. + BasicBlock::iterator ScanPos = DirtyBB->end(); + if (ExistingResult) { + if (Instruction *Inst = ExistingResult->getResult().getInst()) { + ScanPos = Inst; + // We're removing QueryInst's use of Inst. + RemoveFromReverseMap(ReverseNonLocalDeps, Inst, + QueryCS.getInstruction()); + } + } + + // Find out if this block has a local dependency for QueryInst. + MemDepResult Dep; + + if (ScanPos != DirtyBB->begin()) { + Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB); + } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) { + // No dependence found. If this is the entry block of the function, it is + // a clobber, otherwise it is unknown. + Dep = MemDepResult::getNonLocal(); + } else { + Dep = MemDepResult::getUnknown(); + } + + // If we had a dirty entry for the block, update it. Otherwise, just add + // a new entry. + if (ExistingResult) + ExistingResult->setResult(Dep); + else + Cache.push_back(NonLocalDepEntry(DirtyBB, Dep)); + + // If the block has a dependency (i.e. it isn't completely transparent to + // the value), remember the association! + if (!Dep.isNonLocal()) { + // Keep the ReverseNonLocalDeps map up to date so we can efficiently + // update this when we remove instructions. + if (Instruction *Inst = Dep.getInst()) + ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction()); + } else { + + // If the block *is* completely transparent to the load, we need to check + // the predecessors of this block. Add them to our worklist. + for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI) + DirtyBlocks.push_back(*PI); + } + } + + return Cache; +} + +/// getNonLocalPointerDependency - Perform a full dependency query for an +/// access to the specified (non-volatile) memory location, returning the +/// set of instructions that either define or clobber the value. +/// +/// This method assumes the pointer has a "NonLocal" dependency within its +/// own block. +/// +void MemoryDependenceAnalysis:: +getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad, + BasicBlock *FromBB, + SmallVectorImpl<NonLocalDepResult> &Result) { + assert(Loc.Ptr->getType()->isPointerTy() && + "Can't get pointer deps of a non-pointer!"); + Result.clear(); + + PHITransAddr Address(const_cast<Value *>(Loc.Ptr), TD); + + // This is the set of blocks we've inspected, and the pointer we consider in + // each block. Because of critical edges, we currently bail out if querying + // a block with multiple different pointers. This can happen during PHI + // translation. + DenseMap<BasicBlock*, Value*> Visited; + if (!getNonLocalPointerDepFromBB(Address, Loc, isLoad, FromBB, + Result, Visited, true)) + return; + Result.clear(); + Result.push_back(NonLocalDepResult(FromBB, + MemDepResult::getUnknown(), + const_cast<Value *>(Loc.Ptr))); +} + +/// GetNonLocalInfoForBlock - Compute the memdep value for BB with +/// Pointer/PointeeSize using either cached information in Cache or by doing a +/// lookup (which may use dirty cache info if available). If we do a lookup, +/// add the result to the cache. +MemDepResult MemoryDependenceAnalysis:: +GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, + bool isLoad, BasicBlock *BB, + NonLocalDepInfo *Cache, unsigned NumSortedEntries) { + + // Do a binary search to see if we already have an entry for this block in + // the cache set. If so, find it. + NonLocalDepInfo::iterator Entry = + std::upper_bound(Cache->begin(), Cache->begin()+NumSortedEntries, + NonLocalDepEntry(BB)); + if (Entry != Cache->begin() && (Entry-1)->getBB() == BB) + --Entry; + + NonLocalDepEntry *ExistingResult = 0; + if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB) + ExistingResult = &*Entry; + + // If we have a cached entry, and it is non-dirty, use it as the value for + // this dependency. + if (ExistingResult && !ExistingResult->getResult().isDirty()) { + ++NumCacheNonLocalPtr; + return ExistingResult->getResult(); + } + + // Otherwise, we have to scan for the value. If we have a dirty cache + // entry, start scanning from its position, otherwise we scan from the end + // of the block. + BasicBlock::iterator ScanPos = BB->end(); + if (ExistingResult && ExistingResult->getResult().getInst()) { + assert(ExistingResult->getResult().getInst()->getParent() == BB && + "Instruction invalidated?"); + ++NumCacheDirtyNonLocalPtr; + ScanPos = ExistingResult->getResult().getInst(); + + // Eliminating the dirty entry from 'Cache', so update the reverse info. + ValueIsLoadPair CacheKey(Loc.Ptr, isLoad); + RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey); + } else { + ++NumUncacheNonLocalPtr; + } + + // Scan the block for the dependency. + MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB); + + // If we had a dirty entry for the block, update it. Otherwise, just add + // a new entry. + if (ExistingResult) + ExistingResult->setResult(Dep); + else + Cache->push_back(NonLocalDepEntry(BB, Dep)); + + // If the block has a dependency (i.e. it isn't completely transparent to + // the value), remember the reverse association because we just added it + // to Cache! + if (Dep.isNonLocal() || Dep.isUnknown()) + return Dep; + + // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently + // update MemDep when we remove instructions. + Instruction *Inst = Dep.getInst(); + assert(Inst && "Didn't depend on anything?"); + ValueIsLoadPair CacheKey(Loc.Ptr, isLoad); + ReverseNonLocalPtrDeps[Inst].insert(CacheKey); + return Dep; +} + +/// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain +/// number of elements in the array that are already properly ordered. This is +/// optimized for the case when only a few entries are added. +static void +SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, + unsigned NumSortedEntries) { + switch (Cache.size() - NumSortedEntries) { + case 0: + // done, no new entries. + break; + case 2: { + // Two new entries, insert the last one into place. + NonLocalDepEntry Val = Cache.back(); + Cache.pop_back(); + MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry = + std::upper_bound(Cache.begin(), Cache.end()-1, Val); + Cache.insert(Entry, Val); + // FALL THROUGH. + } + case 1: + // One new entry, Just insert the new value at the appropriate position. + if (Cache.size() != 1) { + NonLocalDepEntry Val = Cache.back(); + Cache.pop_back(); + MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry = + std::upper_bound(Cache.begin(), Cache.end(), Val); + Cache.insert(Entry, Val); + } + break; + default: + // Added many values, do a full scale sort. + std::sort(Cache.begin(), Cache.end()); + break; + } +} + +/// getNonLocalPointerDepFromBB - Perform a dependency query based on +/// pointer/pointeesize starting at the end of StartBB. Add any clobber/def +/// results to the results vector and keep track of which blocks are visited in +/// 'Visited'. +/// +/// This has special behavior for the first block queries (when SkipFirstBlock +/// is true). In this special case, it ignores the contents of the specified +/// block and starts returning dependence info for its predecessors. +/// +/// This function returns false on success, or true to indicate that it could +/// not compute dependence information for some reason. This should be treated +/// as a clobber dependence on the first instruction in the predecessor block. +bool MemoryDependenceAnalysis:: +getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, + const AliasAnalysis::Location &Loc, + bool isLoad, BasicBlock *StartBB, + SmallVectorImpl<NonLocalDepResult> &Result, + DenseMap<BasicBlock*, Value*> &Visited, + bool SkipFirstBlock) { + + // Look up the cached info for Pointer. + ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad); + + // Set up a temporary NLPI value. If the map doesn't yet have an entry for + // CacheKey, this value will be inserted as the associated value. Otherwise, + // it'll be ignored, and we'll have to check to see if the cached size and + // tbaa tag are consistent with the current query. + NonLocalPointerInfo InitialNLPI; + InitialNLPI.Size = Loc.Size; + InitialNLPI.TBAATag = Loc.TBAATag; + + // Get the NLPI for CacheKey, inserting one into the map if it doesn't + // already have one. + std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair = + NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI)); + NonLocalPointerInfo *CacheInfo = &Pair.first->second; + + // If we already have a cache entry for this CacheKey, we may need to do some + // work to reconcile the cache entry and the current query. + if (!Pair.second) { + if (CacheInfo->Size < Loc.Size) { + // The query's Size is greater than the cached one. Throw out the + // cached data and procede with the query at the greater size. + CacheInfo->Pair = BBSkipFirstBlockPair(); + CacheInfo->Size = Loc.Size; + for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(), + DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI) + if (Instruction *Inst = DI->getResult().getInst()) + RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey); + CacheInfo->NonLocalDeps.clear(); + } else if (CacheInfo->Size > Loc.Size) { + // This query's Size is less than the cached one. Conservatively restart + // the query using the greater size. + return getNonLocalPointerDepFromBB(Pointer, + Loc.getWithNewSize(CacheInfo->Size), + isLoad, StartBB, Result, Visited, + SkipFirstBlock); + } + + // If the query's TBAATag is inconsistent with the cached one, + // conservatively throw out the cached data and restart the query with + // no tag if needed. + if (CacheInfo->TBAATag != Loc.TBAATag) { + if (CacheInfo->TBAATag) { + CacheInfo->Pair = BBSkipFirstBlockPair(); + CacheInfo->TBAATag = 0; + for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(), + DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI) + if (Instruction *Inst = DI->getResult().getInst()) + RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey); + CacheInfo->NonLocalDeps.clear(); + } + if (Loc.TBAATag) + return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutTBAATag(), + isLoad, StartBB, Result, Visited, + SkipFirstBlock); + } + } + + NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps; + + // If we have valid cached information for exactly the block we are + // investigating, just return it with no recomputation. + if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) { + // We have a fully cached result for this query then we can just return the + // cached results and populate the visited set. However, we have to verify + // that we don't already have conflicting results for these blocks. Check + // to ensure that if a block in the results set is in the visited set that + // it was for the same pointer query. + if (!Visited.empty()) { + for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); + I != E; ++I) { + DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB()); + if (VI == Visited.end() || VI->second == Pointer.getAddr()) + continue; + + // We have a pointer mismatch in a block. Just return clobber, saying + // that something was clobbered in this result. We could also do a + // non-fully cached query, but there is little point in doing this. + return true; + } + } + + Value *Addr = Pointer.getAddr(); + for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); + I != E; ++I) { + Visited.insert(std::make_pair(I->getBB(), Addr)); + if (!I->getResult().isNonLocal()) + Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Addr)); + } + ++NumCacheCompleteNonLocalPtr; + return false; + } + + // Otherwise, either this is a new block, a block with an invalid cache + // pointer or one that we're about to invalidate by putting more info into it + // than its valid cache info. If empty, the result will be valid cache info, + // otherwise it isn't. + if (Cache->empty()) + CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock); + else + CacheInfo->Pair = BBSkipFirstBlockPair(); + + SmallVector<BasicBlock*, 32> Worklist; + Worklist.push_back(StartBB); + + // PredList used inside loop. + SmallVector<std::pair<BasicBlock*, PHITransAddr>, 16> PredList; + + // Keep track of the entries that we know are sorted. Previously cached + // entries will all be sorted. The entries we add we only sort on demand (we + // don't insert every element into its sorted position). We know that we + // won't get any reuse from currently inserted values, because we don't + // revisit blocks after we insert info for them. + unsigned NumSortedEntries = Cache->size(); + DEBUG(AssertSorted(*Cache)); + + while (!Worklist.empty()) { + BasicBlock *BB = Worklist.pop_back_val(); + + // Skip the first block if we have it. + if (!SkipFirstBlock) { + // Analyze the dependency of *Pointer in FromBB. See if we already have + // been here. + assert(Visited.count(BB) && "Should check 'visited' before adding to WL"); + + // Get the dependency info for Pointer in BB. If we have cached + // information, we will use it, otherwise we compute it. + DEBUG(AssertSorted(*Cache, NumSortedEntries)); + MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache, + NumSortedEntries); + + // If we got a Def or Clobber, add this to the list of results. + if (!Dep.isNonLocal()) { + Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr())); + continue; + } + } + + // If 'Pointer' is an instruction defined in this block, then we need to do + // phi translation to change it into a value live in the predecessor block. + // If not, we just add the predecessors to the worklist and scan them with + // the same Pointer. + if (!Pointer.NeedsPHITranslationFromBlock(BB)) { + SkipFirstBlock = false; + SmallVector<BasicBlock*, 16> NewBlocks; + for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { + // Verify that we haven't looked at this block yet. + std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool> + InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr())); + if (InsertRes.second) { + // First time we've looked at *PI. + NewBlocks.push_back(*PI); + continue; + } + + // If we have seen this block before, but it was with a different + // pointer then we have a phi translation failure and we have to treat + // this as a clobber. + if (InsertRes.first->second != Pointer.getAddr()) { + // Make sure to clean up the Visited map before continuing on to + // PredTranslationFailure. + for (unsigned i = 0; i < NewBlocks.size(); i++) + Visited.erase(NewBlocks[i]); + goto PredTranslationFailure; + } + } + Worklist.append(NewBlocks.begin(), NewBlocks.end()); + continue; + } + + // We do need to do phi translation, if we know ahead of time we can't phi + // translate this value, don't even try. + if (!Pointer.IsPotentiallyPHITranslatable()) + goto PredTranslationFailure; + + // We may have added values to the cache list before this PHI translation. + // If so, we haven't done anything to ensure that the cache remains sorted. + // Sort it now (if needed) so that recursive invocations of + // getNonLocalPointerDepFromBB and other routines that could reuse the cache + // value will only see properly sorted cache arrays. + if (Cache && NumSortedEntries != Cache->size()) { + SortNonLocalDepInfoCache(*Cache, NumSortedEntries); + NumSortedEntries = Cache->size(); + } + Cache = 0; + + PredList.clear(); + for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { + BasicBlock *Pred = *PI; + PredList.push_back(std::make_pair(Pred, Pointer)); + + // Get the PHI translated pointer in this predecessor. This can fail if + // not translatable, in which case the getAddr() returns null. + PHITransAddr &PredPointer = PredList.back().second; + PredPointer.PHITranslateValue(BB, Pred, 0); + + Value *PredPtrVal = PredPointer.getAddr(); + + // Check to see if we have already visited this pred block with another + // pointer. If so, we can't do this lookup. This failure can occur + // with PHI translation when a critical edge exists and the PHI node in + // the successor translates to a pointer value different than the + // pointer the block was first analyzed with. + std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool> + InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal)); + + if (!InsertRes.second) { + // We found the pred; take it off the list of preds to visit. + PredList.pop_back(); + + // If the predecessor was visited with PredPtr, then we already did + // the analysis and can ignore it. + if (InsertRes.first->second == PredPtrVal) + continue; + + // Otherwise, the block was previously analyzed with a different + // pointer. We can't represent the result of this case, so we just + // treat this as a phi translation failure. + + // Make sure to clean up the Visited map before continuing on to + // PredTranslationFailure. + for (unsigned i = 0; i < PredList.size(); i++) + Visited.erase(PredList[i].first); + + goto PredTranslationFailure; + } + } + + // Actually process results here; this need to be a separate loop to avoid + // calling getNonLocalPointerDepFromBB for blocks we don't want to return + // any results for. (getNonLocalPointerDepFromBB will modify our + // datastructures in ways the code after the PredTranslationFailure label + // doesn't expect.) + for (unsigned i = 0; i < PredList.size(); i++) { + BasicBlock *Pred = PredList[i].first; + PHITransAddr &PredPointer = PredList[i].second; + Value *PredPtrVal = PredPointer.getAddr(); + + bool CanTranslate = true; + // If PHI translation was unable to find an available pointer in this + // predecessor, then we have to assume that the pointer is clobbered in + // that predecessor. We can still do PRE of the load, which would insert + // a computation of the pointer in this predecessor. + if (PredPtrVal == 0) + CanTranslate = false; + + // FIXME: it is entirely possible that PHI translating will end up with + // the same value. Consider PHI translating something like: + // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need* + // to recurse here, pedantically speaking. + + // If getNonLocalPointerDepFromBB fails here, that means the cached + // result conflicted with the Visited list; we have to conservatively + // assume it is unknown, but this also does not block PRE of the load. + if (!CanTranslate || + getNonLocalPointerDepFromBB(PredPointer, + Loc.getWithNewPtr(PredPtrVal), + isLoad, Pred, + Result, Visited)) { + // Add the entry to the Result list. + NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal); + Result.push_back(Entry); + + // Since we had a phi translation failure, the cache for CacheKey won't + // include all of the entries that we need to immediately satisfy future + // queries. Mark this in NonLocalPointerDeps by setting the + // BBSkipFirstBlockPair pointer to null. This requires reuse of the + // cached value to do more work but not miss the phi trans failure. + NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey]; + NLPI.Pair = BBSkipFirstBlockPair(); + continue; + } + } + + // Refresh the CacheInfo/Cache pointer so that it isn't invalidated. + CacheInfo = &NonLocalPointerDeps[CacheKey]; + Cache = &CacheInfo->NonLocalDeps; + NumSortedEntries = Cache->size(); + + // Since we did phi translation, the "Cache" set won't contain all of the + // results for the query. This is ok (we can still use it to accelerate + // specific block queries) but we can't do the fastpath "return all + // results from the set" Clear out the indicator for this. + CacheInfo->Pair = BBSkipFirstBlockPair(); + SkipFirstBlock = false; + continue; + + PredTranslationFailure: + // The following code is "failure"; we can't produce a sane translation + // for the given block. It assumes that we haven't modified any of + // our datastructures while processing the current block. + + if (Cache == 0) { + // Refresh the CacheInfo/Cache pointer if it got invalidated. + CacheInfo = &NonLocalPointerDeps[CacheKey]; + Cache = &CacheInfo->NonLocalDeps; + NumSortedEntries = Cache->size(); + } + + // Since we failed phi translation, the "Cache" set won't contain all of the + // results for the query. This is ok (we can still use it to accelerate + // specific block queries) but we can't do the fastpath "return all + // results from the set". Clear out the indicator for this. + CacheInfo->Pair = BBSkipFirstBlockPair(); + + // If *nothing* works, mark the pointer as unknown. + // + // If this is the magic first block, return this as a clobber of the whole + // incoming value. Since we can't phi translate to one of the predecessors, + // we have to bail out. + if (SkipFirstBlock) + return true; + + for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) { + assert(I != Cache->rend() && "Didn't find current block??"); + if (I->getBB() != BB) + continue; + + assert(I->getResult().isNonLocal() && + "Should only be here with transparent block"); + I->setResult(MemDepResult::getUnknown()); + Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), + Pointer.getAddr())); + break; + } + } + + // Okay, we're done now. If we added new values to the cache, re-sort it. + SortNonLocalDepInfoCache(*Cache, NumSortedEntries); + DEBUG(AssertSorted(*Cache)); + return false; +} + +/// RemoveCachedNonLocalPointerDependencies - If P exists in +/// CachedNonLocalPointerInfo, remove it. +void MemoryDependenceAnalysis:: +RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) { + CachedNonLocalPointerInfo::iterator It = + NonLocalPointerDeps.find(P); + if (It == NonLocalPointerDeps.end()) return; + + // Remove all of the entries in the BB->val map. This involves removing + // instructions from the reverse map. + NonLocalDepInfo &PInfo = It->second.NonLocalDeps; + + for (unsigned i = 0, e = PInfo.size(); i != e; ++i) { + Instruction *Target = PInfo[i].getResult().getInst(); + if (Target == 0) continue; // Ignore non-local dep results. + assert(Target->getParent() == PInfo[i].getBB()); + + // Eliminating the dirty entry from 'Cache', so update the reverse info. + RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P); + } + + // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo). + NonLocalPointerDeps.erase(It); +} + + +/// invalidateCachedPointerInfo - This method is used to invalidate cached +/// information about the specified pointer, because it may be too +/// conservative in memdep. This is an optional call that can be used when +/// the client detects an equivalence between the pointer and some other +/// value and replaces the other value with ptr. This can make Ptr available +/// in more places that cached info does not necessarily keep. +void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) { + // If Ptr isn't really a pointer, just ignore it. + if (!Ptr->getType()->isPointerTy()) return; + // Flush store info for the pointer. + RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false)); + // Flush load info for the pointer. + RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true)); +} + +/// invalidateCachedPredecessors - Clear the PredIteratorCache info. +/// This needs to be done when the CFG changes, e.g., due to splitting +/// critical edges. +void MemoryDependenceAnalysis::invalidateCachedPredecessors() { + PredCache->clear(); +} + +/// removeInstruction - Remove an instruction from the dependence analysis, +/// updating the dependence of instructions that previously depended on it. +/// This method attempts to keep the cache coherent using the reverse map. +void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { + // Walk through the Non-local dependencies, removing this one as the value + // for any cached queries. + NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst); + if (NLDI != NonLocalDeps.end()) { + NonLocalDepInfo &BlockMap = NLDI->second.first; + for (NonLocalDepInfo::iterator DI = BlockMap.begin(), DE = BlockMap.end(); + DI != DE; ++DI) + if (Instruction *Inst = DI->getResult().getInst()) + RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst); + NonLocalDeps.erase(NLDI); + } + + // If we have a cached local dependence query for this instruction, remove it. + // + LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst); + if (LocalDepEntry != LocalDeps.end()) { + // Remove us from DepInst's reverse set now that the local dep info is gone. + if (Instruction *Inst = LocalDepEntry->second.getInst()) + RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst); + + // Remove this local dependency info. + LocalDeps.erase(LocalDepEntry); + } + + // If we have any cached pointer dependencies on this instruction, remove + // them. If the instruction has non-pointer type, then it can't be a pointer + // base. + + // Remove it from both the load info and the store info. The instruction + // can't be in either of these maps if it is non-pointer. + if (RemInst->getType()->isPointerTy()) { + RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false)); + RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true)); + } + + // Loop over all of the things that depend on the instruction we're removing. + // + SmallVector<std::pair<Instruction*, Instruction*>, 8> ReverseDepsToAdd; + + // If we find RemInst as a clobber or Def in any of the maps for other values, + // we need to replace its entry with a dirty version of the instruction after + // it. If RemInst is a terminator, we use a null dirty value. + // + // Using a dirty version of the instruction after RemInst saves having to scan + // the entire block to get to this point. + MemDepResult NewDirtyVal; + if (!RemInst->isTerminator()) + NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst)); + + ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst); + if (ReverseDepIt != ReverseLocalDeps.end()) { + SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second; + // RemInst can't be the terminator if it has local stuff depending on it. + assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) && + "Nothing can locally depend on a terminator"); + + for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(), + E = ReverseDeps.end(); I != E; ++I) { + Instruction *InstDependingOnRemInst = *I; + assert(InstDependingOnRemInst != RemInst && + "Already removed our local dep info"); + + LocalDeps[InstDependingOnRemInst] = NewDirtyVal; + + // Make sure to remember that new things depend on NewDepInst. + assert(NewDirtyVal.getInst() && "There is no way something else can have " + "a local dep on this if it is a terminator!"); + ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(), + InstDependingOnRemInst)); + } + + ReverseLocalDeps.erase(ReverseDepIt); + + // Add new reverse deps after scanning the set, to avoid invalidating the + // 'ReverseDeps' reference. + while (!ReverseDepsToAdd.empty()) { + ReverseLocalDeps[ReverseDepsToAdd.back().first] + .insert(ReverseDepsToAdd.back().second); + ReverseDepsToAdd.pop_back(); + } + } + + ReverseDepIt = ReverseNonLocalDeps.find(RemInst); + if (ReverseDepIt != ReverseNonLocalDeps.end()) { + SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second; + for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end(); + I != E; ++I) { + assert(*I != RemInst && "Already removed NonLocalDep info for RemInst"); + + PerInstNLInfo &INLD = NonLocalDeps[*I]; + // The information is now dirty! + INLD.second = true; + + for (NonLocalDepInfo::iterator DI = INLD.first.begin(), + DE = INLD.first.end(); DI != DE; ++DI) { + if (DI->getResult().getInst() != RemInst) continue; + + // Convert to a dirty entry for the subsequent instruction. + DI->setResult(NewDirtyVal); + + if (Instruction *NextI = NewDirtyVal.getInst()) + ReverseDepsToAdd.push_back(std::make_pair(NextI, *I)); + } + } + + ReverseNonLocalDeps.erase(ReverseDepIt); + + // Add new reverse deps after scanning the set, to avoid invalidating 'Set' + while (!ReverseDepsToAdd.empty()) { + ReverseNonLocalDeps[ReverseDepsToAdd.back().first] + .insert(ReverseDepsToAdd.back().second); + ReverseDepsToAdd.pop_back(); + } + } + + // If the instruction is in ReverseNonLocalPtrDeps then it appears as a + // value in the NonLocalPointerDeps info. + ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt = + ReverseNonLocalPtrDeps.find(RemInst); + if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) { + SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second; + SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd; + + for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(), + E = Set.end(); I != E; ++I) { + ValueIsLoadPair P = *I; + assert(P.getPointer() != RemInst && + "Already removed NonLocalPointerDeps info for RemInst"); + + NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps; + + // The cache is not valid for any specific block anymore. + NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair(); + + // Update any entries for RemInst to use the instruction after it. + for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end(); + DI != DE; ++DI) { + if (DI->getResult().getInst() != RemInst) continue; + + // Convert to a dirty entry for the subsequent instruction. + DI->setResult(NewDirtyVal); + + if (Instruction *NewDirtyInst = NewDirtyVal.getInst()) + ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P)); + } + + // Re-sort the NonLocalDepInfo. Changing the dirty entry to its + // subsequent value may invalidate the sortedness. + std::sort(NLPDI.begin(), NLPDI.end()); + } + + ReverseNonLocalPtrDeps.erase(ReversePtrDepIt); + + while (!ReversePtrDepsToAdd.empty()) { + ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first] + .insert(ReversePtrDepsToAdd.back().second); + ReversePtrDepsToAdd.pop_back(); + } + } + + + assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?"); + AA->deleteValue(RemInst); + DEBUG(verifyRemoved(RemInst)); +} +/// verifyRemoved - Verify that the specified instruction does not occur +/// in our internal data structures. +void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { + for (LocalDepMapType::const_iterator I = LocalDeps.begin(), + E = LocalDeps.end(); I != E; ++I) { + assert(I->first != D && "Inst occurs in data structures"); + assert(I->second.getInst() != D && + "Inst occurs in data structures"); + } + + for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(), + E = NonLocalPointerDeps.end(); I != E; ++I) { + assert(I->first.getPointer() != D && "Inst occurs in NLPD map key"); + const NonLocalDepInfo &Val = I->second.NonLocalDeps; + for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end(); + II != E; ++II) + assert(II->getResult().getInst() != D && "Inst occurs as NLPD value"); + } + + for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(), + E = NonLocalDeps.end(); I != E; ++I) { + assert(I->first != D && "Inst occurs in data structures"); + const PerInstNLInfo &INLD = I->second; + for (NonLocalDepInfo::const_iterator II = INLD.first.begin(), + EE = INLD.first.end(); II != EE; ++II) + assert(II->getResult().getInst() != D && "Inst occurs in data structures"); + } + + for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(), + E = ReverseLocalDeps.end(); I != E; ++I) { + assert(I->first != D && "Inst occurs in data structures"); + for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(), + EE = I->second.end(); II != EE; ++II) + assert(*II != D && "Inst occurs in data structures"); + } + + for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(), + E = ReverseNonLocalDeps.end(); + I != E; ++I) { + assert(I->first != D && "Inst occurs in data structures"); + for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(), + EE = I->second.end(); II != EE; ++II) + assert(*II != D && "Inst occurs in data structures"); + } + + for (ReverseNonLocalPtrDepTy::const_iterator + I = ReverseNonLocalPtrDeps.begin(), + E = ReverseNonLocalPtrDeps.end(); I != E; ++I) { + assert(I->first != D && "Inst occurs in rev NLPD map"); + + for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(), + E = I->second.end(); II != E; ++II) + assert(*II != ValueIsLoadPair(D, false) && + *II != ValueIsLoadPair(D, true) && + "Inst occurs in ReverseNonLocalPtrDeps map"); + } + +} diff --git a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp new file mode 100644 index 0000000..e7e999c --- /dev/null +++ b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -0,0 +1,87 @@ +//===-- ModuleDebugInfoPrinter.cpp - Prints module debug info metadata ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass decodes the debug info metadata in a module and prints in a +// (sufficiently-prepared-) human-readable form. +// +// For example, run this pass from opt along with the -analyze option, and +// it'll print to standard output. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Pass.h" +#include "llvm/Function.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +namespace { + class ModuleDebugInfoPrinter : public ModulePass { + DebugInfoFinder Finder; + public: + static char ID; // Pass identification, replacement for typeid + ModuleDebugInfoPrinter() : ModulePass(ID) { + initializeModuleDebugInfoPrinterPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnModule(Module &M); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + virtual void print(raw_ostream &O, const Module *M) const; + }; +} + +char ModuleDebugInfoPrinter::ID = 0; +INITIALIZE_PASS(ModuleDebugInfoPrinter, "module-debuginfo", + "Decodes module-level debug info", false, true) + +ModulePass *llvm::createModuleDebugInfoPrinterPass() { + return new ModuleDebugInfoPrinter(); +} + +bool ModuleDebugInfoPrinter::runOnModule(Module &M) { + Finder.processModule(M); + return false; +} + +void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const { + for (DebugInfoFinder::iterator I = Finder.compile_unit_begin(), + E = Finder.compile_unit_end(); I != E; ++I) { + O << "Compile Unit: "; + DICompileUnit(*I).print(O); + O << '\n'; + } + + for (DebugInfoFinder::iterator I = Finder.subprogram_begin(), + E = Finder.subprogram_end(); I != E; ++I) { + O << "Subprogram: "; + DISubprogram(*I).print(O); + O << '\n'; + } + + for (DebugInfoFinder::iterator I = Finder.global_variable_begin(), + E = Finder.global_variable_end(); I != E; ++I) { + O << "GlobalVariable: "; + DIGlobalVariable(*I).print(O); + O << '\n'; + } + + for (DebugInfoFinder::iterator I = Finder.type_begin(), + E = Finder.type_end(); I != E; ++I) { + O << "Type: "; + DIType(*I).print(O); + O << '\n'; + } +} diff --git a/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp new file mode 100644 index 0000000..101c2d5 --- /dev/null +++ b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp @@ -0,0 +1,88 @@ +//===- NoAliasAnalysis.cpp - Minimal Alias Analysis Impl ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the default implementation of the Alias Analysis interface +// that simply returns "I don't know" for all queries. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Pass.h" +#include "llvm/Target/TargetData.h" +using namespace llvm; + +namespace { + /// NoAA - This class implements the -no-aa pass, which always returns "I + /// don't know" for alias queries. NoAA is unlike other alias analysis + /// implementations, in that it does not chain to a previous analysis. As + /// such it doesn't follow many of the rules that other alias analyses must. + /// + struct NoAA : public ImmutablePass, public AliasAnalysis { + static char ID; // Class identification, replacement for typeinfo + NoAA() : ImmutablePass(ID) { + initializeNoAAPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + } + + virtual void initializePass() { + // Note: NoAA does not call InitializeAliasAnalysis because it's + // special and does not support chaining. + TD = getAnalysisIfAvailable<TargetData>(); + } + + virtual AliasResult alias(const Location &LocA, const Location &LocB) { + return MayAlias; + } + + virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS) { + return UnknownModRefBehavior; + } + virtual ModRefBehavior getModRefBehavior(const Function *F) { + return UnknownModRefBehavior; + } + + virtual bool pointsToConstantMemory(const Location &Loc, + bool OrLocal) { + return false; + } + virtual ModRefResult getModRefInfo(ImmutableCallSite CS, + const Location &Loc) { + return ModRef; + } + virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + return ModRef; + } + + virtual void deleteValue(Value *V) {} + virtual void copyValue(Value *From, Value *To) {} + virtual void addEscapingUse(Use &U) {} + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + }; +} // End of anonymous namespace + +// Register this pass... +char NoAA::ID = 0; +INITIALIZE_AG_PASS(NoAA, AliasAnalysis, "no-aa", + "No Alias Analysis (always returns 'may' alias)", + true, true, true) + +ImmutablePass *llvm::createNoAAPass() { return new NoAA(); } diff --git a/contrib/llvm/lib/Analysis/PHITransAddr.cpp b/contrib/llvm/lib/Analysis/PHITransAddr.cpp new file mode 100644 index 0000000..70dcd0d --- /dev/null +++ b/contrib/llvm/lib/Analysis/PHITransAddr.cpp @@ -0,0 +1,442 @@ +//===- PHITransAddr.cpp - PHI Translation for Addresses -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PHITransAddr class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +static bool CanPHITrans(Instruction *Inst) { + if (isa<PHINode>(Inst) || + isa<GetElementPtrInst>(Inst)) + return true; + + if (isa<CastInst>(Inst) && + Inst->isSafeToSpeculativelyExecute()) + return true; + + if (Inst->getOpcode() == Instruction::Add && + isa<ConstantInt>(Inst->getOperand(1))) + return true; + + // cerr << "MEMDEP: Could not PHI translate: " << *Pointer; + // if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst)) + // cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0); + return false; +} + +void PHITransAddr::dump() const { + if (Addr == 0) { + dbgs() << "PHITransAddr: null\n"; + return; + } + dbgs() << "PHITransAddr: " << *Addr << "\n"; + for (unsigned i = 0, e = InstInputs.size(); i != e; ++i) + dbgs() << " Input #" << i << " is " << *InstInputs[i] << "\n"; +} + + +static bool VerifySubExpr(Value *Expr, + SmallVectorImpl<Instruction*> &InstInputs) { + // If this is a non-instruction value, there is nothing to do. + Instruction *I = dyn_cast<Instruction>(Expr); + if (I == 0) return true; + + // If it's an instruction, it is either in Tmp or its operands recursively + // are. + SmallVectorImpl<Instruction*>::iterator Entry = + std::find(InstInputs.begin(), InstInputs.end(), I); + if (Entry != InstInputs.end()) { + InstInputs.erase(Entry); + return true; + } + + // If it isn't in the InstInputs list it is a subexpr incorporated into the + // address. Sanity check that it is phi translatable. + if (!CanPHITrans(I)) { + errs() << "Non phi translatable instruction found in PHITransAddr:\n"; + errs() << *I << '\n'; + llvm_unreachable("Either something is missing from InstInputs or " + "CanPHITrans is wrong."); + return false; + } + + // Validate the operands of the instruction. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (!VerifySubExpr(I->getOperand(i), InstInputs)) + return false; + + return true; +} + +/// Verify - Check internal consistency of this data structure. If the +/// structure is valid, it returns true. If invalid, it prints errors and +/// returns false. +bool PHITransAddr::Verify() const { + if (Addr == 0) return true; + + SmallVector<Instruction*, 8> Tmp(InstInputs.begin(), InstInputs.end()); + + if (!VerifySubExpr(Addr, Tmp)) + return false; + + if (!Tmp.empty()) { + errs() << "PHITransAddr contains extra instructions:\n"; + for (unsigned i = 0, e = InstInputs.size(); i != e; ++i) + errs() << " InstInput #" << i << " is " << *InstInputs[i] << "\n"; + llvm_unreachable("This is unexpected."); + return false; + } + + // a-ok. + return true; +} + + +/// IsPotentiallyPHITranslatable - If this needs PHI translation, return true +/// if we have some hope of doing it. This should be used as a filter to +/// avoid calling PHITranslateValue in hopeless situations. +bool PHITransAddr::IsPotentiallyPHITranslatable() const { + // If the input value is not an instruction, or if it is not defined in CurBB, + // then we don't need to phi translate it. + Instruction *Inst = dyn_cast<Instruction>(Addr); + return Inst == 0 || CanPHITrans(Inst); +} + + +static void RemoveInstInputs(Value *V, + SmallVectorImpl<Instruction*> &InstInputs) { + Instruction *I = dyn_cast<Instruction>(V); + if (I == 0) return; + + // If the instruction is in the InstInputs list, remove it. + SmallVectorImpl<Instruction*>::iterator Entry = + std::find(InstInputs.begin(), InstInputs.end(), I); + if (Entry != InstInputs.end()) { + InstInputs.erase(Entry); + return; + } + + assert(!isa<PHINode>(I) && "Error, removing something that isn't an input"); + + // Otherwise, it must have instruction inputs itself. Zap them recursively. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(i))) + RemoveInstInputs(Op, InstInputs); + } +} + +Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, + BasicBlock *PredBB, + const DominatorTree *DT) { + // If this is a non-instruction value, it can't require PHI translation. + Instruction *Inst = dyn_cast<Instruction>(V); + if (Inst == 0) return V; + + // Determine whether 'Inst' is an input to our PHI translatable expression. + bool isInput = std::count(InstInputs.begin(), InstInputs.end(), Inst); + + // Handle inputs instructions if needed. + if (isInput) { + if (Inst->getParent() != CurBB) { + // If it is an input defined in a different block, then it remains an + // input. + return Inst; + } + + // If 'Inst' is defined in this block and is an input that needs to be phi + // translated, we need to incorporate the value into the expression or fail. + + // In either case, the instruction itself isn't an input any longer. + InstInputs.erase(std::find(InstInputs.begin(), InstInputs.end(), Inst)); + + // If this is a PHI, go ahead and translate it. + if (PHINode *PN = dyn_cast<PHINode>(Inst)) + return AddAsInput(PN->getIncomingValueForBlock(PredBB)); + + // If this is a non-phi value, and it is analyzable, we can incorporate it + // into the expression by making all instruction operands be inputs. + if (!CanPHITrans(Inst)) + return 0; + + // All instruction operands are now inputs (and of course, they may also be + // defined in this block, so they may need to be phi translated themselves. + for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) + if (Instruction *Op = dyn_cast<Instruction>(Inst->getOperand(i))) + InstInputs.push_back(Op); + } + + // Ok, it must be an intermediate result (either because it started that way + // or because we just incorporated it into the expression). See if its + // operands need to be phi translated, and if so, reconstruct it. + + if (CastInst *Cast = dyn_cast<CastInst>(Inst)) { + if (!Cast->isSafeToSpeculativelyExecute()) return 0; + Value *PHIIn = PHITranslateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT); + if (PHIIn == 0) return 0; + if (PHIIn == Cast->getOperand(0)) + return Cast; + + // Find an available version of this cast. + + // Constants are trivial to find. + if (Constant *C = dyn_cast<Constant>(PHIIn)) + return AddAsInput(ConstantExpr::getCast(Cast->getOpcode(), + C, Cast->getType())); + + // Otherwise we have to see if a casted version of the incoming pointer + // is available. If so, we can use it, otherwise we have to fail. + for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end(); + UI != E; ++UI) { + if (CastInst *CastI = dyn_cast<CastInst>(*UI)) + if (CastI->getOpcode() == Cast->getOpcode() && + CastI->getType() == Cast->getType() && + (!DT || DT->dominates(CastI->getParent(), PredBB))) + return CastI; + } + return 0; + } + + // Handle getelementptr with at least one PHI translatable operand. + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { + SmallVector<Value*, 8> GEPOps; + bool AnyChanged = false; + for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { + Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB, DT); + if (GEPOp == 0) return 0; + + AnyChanged |= GEPOp != GEP->getOperand(i); + GEPOps.push_back(GEPOp); + } + + if (!AnyChanged) + return GEP; + + // Simplify the GEP to handle 'gep x, 0' -> x etc. + if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD, DT)) { + for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) + RemoveInstInputs(GEPOps[i], InstInputs); + + return AddAsInput(V); + } + + // Scan to see if we have this GEP available. + Value *APHIOp = GEPOps[0]; + for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end(); + UI != E; ++UI) { + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI)) + if (GEPI->getType() == GEP->getType() && + GEPI->getNumOperands() == GEPOps.size() && + GEPI->getParent()->getParent() == CurBB->getParent() && + (!DT || DT->dominates(GEPI->getParent(), PredBB))) { + bool Mismatch = false; + for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) + if (GEPI->getOperand(i) != GEPOps[i]) { + Mismatch = true; + break; + } + if (!Mismatch) + return GEPI; + } + } + return 0; + } + + // Handle add with a constant RHS. + if (Inst->getOpcode() == Instruction::Add && + isa<ConstantInt>(Inst->getOperand(1))) { + // PHI translate the LHS. + Constant *RHS = cast<ConstantInt>(Inst->getOperand(1)); + bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap(); + bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap(); + + Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT); + if (LHS == 0) return 0; + + // If the PHI translated LHS is an add of a constant, fold the immediates. + if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS)) + if (BOp->getOpcode() == Instruction::Add) + if (ConstantInt *CI = dyn_cast<ConstantInt>(BOp->getOperand(1))) { + LHS = BOp->getOperand(0); + RHS = ConstantExpr::getAdd(RHS, CI); + isNSW = isNUW = false; + + // If the old 'LHS' was an input, add the new 'LHS' as an input. + if (std::count(InstInputs.begin(), InstInputs.end(), BOp)) { + RemoveInstInputs(BOp, InstInputs); + AddAsInput(LHS); + } + } + + // See if the add simplifies away. + if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, DT)) { + // If we simplified the operands, the LHS is no longer an input, but Res + // is. + RemoveInstInputs(LHS, InstInputs); + return AddAsInput(Res); + } + + // If we didn't modify the add, just return it. + if (LHS == Inst->getOperand(0) && RHS == Inst->getOperand(1)) + return Inst; + + // Otherwise, see if we have this add available somewhere. + for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end(); + UI != E; ++UI) { + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI)) + if (BO->getOpcode() == Instruction::Add && + BO->getOperand(0) == LHS && BO->getOperand(1) == RHS && + BO->getParent()->getParent() == CurBB->getParent() && + (!DT || DT->dominates(BO->getParent(), PredBB))) + return BO; + } + + return 0; + } + + // Otherwise, we failed. + return 0; +} + + +/// PHITranslateValue - PHI translate the current address up the CFG from +/// CurBB to Pred, updating our state to reflect any needed changes. If the +/// dominator tree DT is non-null, the translated value must dominate +/// PredBB. This returns true on failure and sets Addr to null. +bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB, + const DominatorTree *DT) { + assert(Verify() && "Invalid PHITransAddr!"); + Addr = PHITranslateSubExpr(Addr, CurBB, PredBB, DT); + assert(Verify() && "Invalid PHITransAddr!"); + + if (DT) { + // Make sure the value is live in the predecessor. + if (Instruction *Inst = dyn_cast_or_null<Instruction>(Addr)) + if (!DT->dominates(Inst->getParent(), PredBB)) + Addr = 0; + } + + return Addr == 0; +} + +/// PHITranslateWithInsertion - PHI translate this value into the specified +/// predecessor block, inserting a computation of the value if it is +/// unavailable. +/// +/// All newly created instructions are added to the NewInsts list. This +/// returns null on failure. +/// +Value *PHITransAddr:: +PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB, + const DominatorTree &DT, + SmallVectorImpl<Instruction*> &NewInsts) { + unsigned NISize = NewInsts.size(); + + // Attempt to PHI translate with insertion. + Addr = InsertPHITranslatedSubExpr(Addr, CurBB, PredBB, DT, NewInsts); + + // If successful, return the new value. + if (Addr) return Addr; + + // If not, destroy any intermediate instructions inserted. + while (NewInsts.size() != NISize) + NewInsts.pop_back_val()->eraseFromParent(); + return 0; +} + + +/// InsertPHITranslatedPointer - Insert a computation of the PHI translated +/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB +/// block. All newly created instructions are added to the NewInsts list. +/// This returns null on failure. +/// +Value *PHITransAddr:: +InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, + BasicBlock *PredBB, const DominatorTree &DT, + SmallVectorImpl<Instruction*> &NewInsts) { + // See if we have a version of this value already available and dominating + // PredBB. If so, there is no need to insert a new instance of it. + PHITransAddr Tmp(InVal, TD); + if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT)) + return Tmp.getAddr(); + + // If we don't have an available version of this value, it must be an + // instruction. + Instruction *Inst = cast<Instruction>(InVal); + + // Handle cast of PHI translatable value. + if (CastInst *Cast = dyn_cast<CastInst>(Inst)) { + if (!Cast->isSafeToSpeculativelyExecute()) return 0; + Value *OpVal = InsertPHITranslatedSubExpr(Cast->getOperand(0), + CurBB, PredBB, DT, NewInsts); + if (OpVal == 0) return 0; + + // Otherwise insert a cast at the end of PredBB. + CastInst *New = CastInst::Create(Cast->getOpcode(), + OpVal, InVal->getType(), + InVal->getName()+".phi.trans.insert", + PredBB->getTerminator()); + NewInsts.push_back(New); + return New; + } + + // Handle getelementptr with at least one PHI operand. + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { + SmallVector<Value*, 8> GEPOps; + BasicBlock *CurBB = GEP->getParent(); + for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { + Value *OpVal = InsertPHITranslatedSubExpr(GEP->getOperand(i), + CurBB, PredBB, DT, NewInsts); + if (OpVal == 0) return 0; + GEPOps.push_back(OpVal); + } + + GetElementPtrInst *Result = + GetElementPtrInst::Create(GEPOps[0], GEPOps.begin()+1, GEPOps.end(), + InVal->getName()+".phi.trans.insert", + PredBB->getTerminator()); + Result->setIsInBounds(GEP->isInBounds()); + NewInsts.push_back(Result); + return Result; + } + +#if 0 + // FIXME: This code works, but it is unclear that we actually want to insert + // a big chain of computation in order to make a value available in a block. + // This needs to be evaluated carefully to consider its cost trade offs. + + // Handle add with a constant RHS. + if (Inst->getOpcode() == Instruction::Add && + isa<ConstantInt>(Inst->getOperand(1))) { + // PHI translate the LHS. + Value *OpVal = InsertPHITranslatedSubExpr(Inst->getOperand(0), + CurBB, PredBB, DT, NewInsts); + if (OpVal == 0) return 0; + + BinaryOperator *Res = BinaryOperator::CreateAdd(OpVal, Inst->getOperand(1), + InVal->getName()+".phi.trans.insert", + PredBB->getTerminator()); + Res->setHasNoSignedWrap(cast<BinaryOperator>(Inst)->hasNoSignedWrap()); + Res->setHasNoUnsignedWrap(cast<BinaryOperator>(Inst)->hasNoUnsignedWrap()); + NewInsts.push_back(Res); + return Res; + } +#endif + + return 0; +} diff --git a/contrib/llvm/lib/Analysis/PathNumbering.cpp b/contrib/llvm/lib/Analysis/PathNumbering.cpp new file mode 100644 index 0000000..7c584da --- /dev/null +++ b/contrib/llvm/lib/Analysis/PathNumbering.cpp @@ -0,0 +1,522 @@ +//===- PathNumbering.cpp --------------------------------------*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Ball-Larus path numbers uniquely identify paths through a directed acyclic +// graph (DAG) [Ball96]. For a CFG backedges are removed and replaced by phony +// edges to obtain a DAG, and thus the unique path numbers [Ball96]. +// +// The purpose of this analysis is to enumerate the edges in a CFG in order +// to obtain paths from path numbers in a convenient manner. As described in +// [Ball96] edges can be enumerated such that given a path number by following +// the CFG and updating the path number, the path is obtained. +// +// [Ball96] +// T. Ball and J. R. Larus. "Efficient Path Profiling." +// International Symposium on Microarchitecture, pages 46-57, 1996. +// http://portal.acm.org/citation.cfm?id=243857 +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "ball-larus-numbering" + +#include "llvm/Analysis/PathNumbering.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/InstrTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/TypeBuilder.h" +#include "llvm/Support/raw_ostream.h" + +#include <queue> +#include <stack> +#include <string> +#include <utility> +#include <sstream> + +using namespace llvm; + +// Are we enabling early termination +static cl::opt<bool> ProcessEarlyTermination( + "path-profile-early-termination", cl::Hidden, + cl::desc("In path profiling, insert extra instrumentation to account for " + "unexpected function termination.")); + +// Returns the basic block for the BallLarusNode +BasicBlock* BallLarusNode::getBlock() { + return(_basicBlock); +} + +// Returns the number of paths to the exit starting at the node. +unsigned BallLarusNode::getNumberPaths() { + return(_numberPaths); +} + +// Sets the number of paths to the exit starting at the node. +void BallLarusNode::setNumberPaths(unsigned numberPaths) { + _numberPaths = numberPaths; +} + +// Gets the NodeColor used in graph algorithms. +BallLarusNode::NodeColor BallLarusNode::getColor() { + return(_color); +} + +// Sets the NodeColor used in graph algorithms. +void BallLarusNode::setColor(BallLarusNode::NodeColor color) { + _color = color; +} + +// Returns an iterator over predecessor edges. Includes phony and +// backedges. +BLEdgeIterator BallLarusNode::predBegin() { + return(_predEdges.begin()); +} + +// Returns the end sentinel for the predecessor iterator. +BLEdgeIterator BallLarusNode::predEnd() { + return(_predEdges.end()); +} + +// Returns the number of predecessor edges. Includes phony and +// backedges. +unsigned BallLarusNode::getNumberPredEdges() { + return(_predEdges.size()); +} + +// Returns an iterator over successor edges. Includes phony and +// backedges. +BLEdgeIterator BallLarusNode::succBegin() { + return(_succEdges.begin()); +} + +// Returns the end sentinel for the successor iterator. +BLEdgeIterator BallLarusNode::succEnd() { + return(_succEdges.end()); +} + +// Returns the number of successor edges. Includes phony and +// backedges. +unsigned BallLarusNode::getNumberSuccEdges() { + return(_succEdges.size()); +} + +// Add an edge to the predecessor list. +void BallLarusNode::addPredEdge(BallLarusEdge* edge) { + _predEdges.push_back(edge); +} + +// Remove an edge from the predecessor list. +void BallLarusNode::removePredEdge(BallLarusEdge* edge) { + removeEdge(_predEdges, edge); +} + +// Add an edge to the successor list. +void BallLarusNode::addSuccEdge(BallLarusEdge* edge) { + _succEdges.push_back(edge); +} + +// Remove an edge from the successor list. +void BallLarusNode::removeSuccEdge(BallLarusEdge* edge) { + removeEdge(_succEdges, edge); +} + +// Returns the name of the BasicBlock being represented. If BasicBlock +// is null then returns "<null>". If BasicBlock has no name, then +// "<unnamed>" is returned. Intended for use with debug output. +std::string BallLarusNode::getName() { + std::stringstream name; + + if(getBlock() != NULL) { + if(getBlock()->hasName()) { + std::string tempName(getBlock()->getName()); + name << tempName.c_str() << " (" << _uid << ")"; + } else + name << "<unnamed> (" << _uid << ")"; + } else + name << "<null> (" << _uid << ")"; + + return name.str(); +} + +// Removes an edge from an edgeVector. Used by removePredEdge and +// removeSuccEdge. +void BallLarusNode::removeEdge(BLEdgeVector& v, BallLarusEdge* e) { + // TODO: Avoid linear scan by using a set instead + for(BLEdgeIterator i = v.begin(), + end = v.end(); + i != end; + ++i) { + if((*i) == e) { + v.erase(i); + break; + } + } +} + +// Returns the source node of this edge. +BallLarusNode* BallLarusEdge::getSource() const { + return(_source); +} + +// Returns the target node of this edge. +BallLarusNode* BallLarusEdge::getTarget() const { + return(_target); +} + +// Sets the type of the edge. +BallLarusEdge::EdgeType BallLarusEdge::getType() const { + return _edgeType; +} + +// Gets the type of the edge. +void BallLarusEdge::setType(EdgeType type) { + _edgeType = type; +} + +// Returns the weight of this edge. Used to decode path numbers to sequences +// of basic blocks. +unsigned BallLarusEdge::getWeight() { + return(_weight); +} + +// Sets the weight of the edge. Used during path numbering. +void BallLarusEdge::setWeight(unsigned weight) { + _weight = weight; +} + +// Gets the phony edge originating at the root. +BallLarusEdge* BallLarusEdge::getPhonyRoot() { + return _phonyRoot; +} + +// Sets the phony edge originating at the root. +void BallLarusEdge::setPhonyRoot(BallLarusEdge* phonyRoot) { + _phonyRoot = phonyRoot; +} + +// Gets the phony edge terminating at the exit. +BallLarusEdge* BallLarusEdge::getPhonyExit() { + return _phonyExit; +} + +// Sets the phony edge terminating at the exit. +void BallLarusEdge::setPhonyExit(BallLarusEdge* phonyExit) { + _phonyExit = phonyExit; +} + +// Gets the associated real edge if this is a phony edge. +BallLarusEdge* BallLarusEdge::getRealEdge() { + return _realEdge; +} + +// Sets the associated real edge if this is a phony edge. +void BallLarusEdge::setRealEdge(BallLarusEdge* realEdge) { + _realEdge = realEdge; +} + +// Returns the duplicate number of the edge. +unsigned BallLarusEdge::getDuplicateNumber() { + return(_duplicateNumber); +} + +// Initialization that requires virtual functions which are not fully +// functional in the constructor. +void BallLarusDag::init() { + BLBlockNodeMap inDag; + std::stack<BallLarusNode*> dfsStack; + + _root = addNode(&(_function.getEntryBlock())); + _exit = addNode(NULL); + + // start search from root + dfsStack.push(getRoot()); + + // dfs to add each bb into the dag + while(dfsStack.size()) + buildNode(inDag, dfsStack); + + // put in the final edge + addEdge(getExit(),getRoot(),0); +} + +// Frees all memory associated with the DAG. +BallLarusDag::~BallLarusDag() { + for(BLEdgeIterator edge = _edges.begin(), end = _edges.end(); edge != end; + ++edge) + delete (*edge); + + for(BLNodeIterator node = _nodes.begin(), end = _nodes.end(); node != end; + ++node) + delete (*node); +} + +// Calculate the path numbers by assigning edge increments as prescribed +// in Ball-Larus path profiling. +void BallLarusDag::calculatePathNumbers() { + BallLarusNode* node; + std::queue<BallLarusNode*> bfsQueue; + bfsQueue.push(getExit()); + + while(bfsQueue.size() > 0) { + node = bfsQueue.front(); + + DEBUG(dbgs() << "calculatePathNumbers on " << node->getName() << "\n"); + + bfsQueue.pop(); + unsigned prevPathNumber = node->getNumberPaths(); + calculatePathNumbersFrom(node); + + // Check for DAG splitting + if( node->getNumberPaths() > 100000000 && node != getRoot() ) { + // Add new phony edge from the split-node to the DAG's exit + BallLarusEdge* exitEdge = addEdge(node, getExit(), 0); + exitEdge->setType(BallLarusEdge::SPLITEDGE_PHONY); + + // Counters to handle the possibility of a multi-graph + BasicBlock* oldTarget = 0; + unsigned duplicateNumber = 0; + + // Iterate through each successor edge, adding phony edges + for( BLEdgeIterator succ = node->succBegin(), end = node->succEnd(); + succ != end; oldTarget = (*succ)->getTarget()->getBlock(), succ++ ) { + + if( (*succ)->getType() == BallLarusEdge::NORMAL ) { + // is this edge a duplicate? + if( oldTarget != (*succ)->getTarget()->getBlock() ) + duplicateNumber = 0; + + // create the new phony edge: root -> succ + BallLarusEdge* rootEdge = + addEdge(getRoot(), (*succ)->getTarget(), duplicateNumber++); + rootEdge->setType(BallLarusEdge::SPLITEDGE_PHONY); + rootEdge->setRealEdge(*succ); + + // split on this edge and reference it's exit/root phony edges + (*succ)->setType(BallLarusEdge::SPLITEDGE); + (*succ)->setPhonyRoot(rootEdge); + (*succ)->setPhonyExit(exitEdge); + (*succ)->setWeight(0); + } + } + + calculatePathNumbersFrom(node); + } + + DEBUG(dbgs() << "prev, new number paths " << prevPathNumber << ", " + << node->getNumberPaths() << ".\n"); + + if(prevPathNumber == 0 && node->getNumberPaths() != 0) { + DEBUG(dbgs() << "node ready : " << node->getName() << "\n"); + for(BLEdgeIterator pred = node->predBegin(), end = node->predEnd(); + pred != end; pred++) { + if( (*pred)->getType() == BallLarusEdge::BACKEDGE || + (*pred)->getType() == BallLarusEdge::SPLITEDGE ) + continue; + + BallLarusNode* nextNode = (*pred)->getSource(); + // not yet visited? + if(nextNode->getNumberPaths() == 0) + bfsQueue.push(nextNode); + } + } + } + + DEBUG(dbgs() << "\tNumber of paths: " << getRoot()->getNumberPaths() << "\n"); +} + +// Returns the number of paths for the Dag. +unsigned BallLarusDag::getNumberOfPaths() { + return(getRoot()->getNumberPaths()); +} + +// Returns the root (i.e. entry) node for the DAG. +BallLarusNode* BallLarusDag::getRoot() { + return _root; +} + +// Returns the exit node for the DAG. +BallLarusNode* BallLarusDag::getExit() { + return _exit; +} + +// Returns the function for the DAG. +Function& BallLarusDag::getFunction() { + return(_function); +} + +// Clears the node colors. +void BallLarusDag::clearColors(BallLarusNode::NodeColor color) { + for (BLNodeIterator nodeIt = _nodes.begin(); nodeIt != _nodes.end(); nodeIt++) + (*nodeIt)->setColor(color); +} + +// Processes one node and its imediate edges for building the DAG. +void BallLarusDag::buildNode(BLBlockNodeMap& inDag, BLNodeStack& dfsStack) { + BallLarusNode* currentNode = dfsStack.top(); + BasicBlock* currentBlock = currentNode->getBlock(); + + if(currentNode->getColor() != BallLarusNode::WHITE) { + // we have already visited this node + dfsStack.pop(); + currentNode->setColor(BallLarusNode::BLACK); + } else { + // are there any external procedure calls? + if( ProcessEarlyTermination ) { + for( BasicBlock::iterator bbCurrent = currentNode->getBlock()->begin(), + bbEnd = currentNode->getBlock()->end(); bbCurrent != bbEnd; + bbCurrent++ ) { + Instruction& instr = *bbCurrent; + if( instr.getOpcode() == Instruction::Call ) { + BallLarusEdge* callEdge = addEdge(currentNode, getExit(), 0); + callEdge->setType(BallLarusEdge::CALLEDGE_PHONY); + break; + } + } + } + + TerminatorInst* terminator = currentNode->getBlock()->getTerminator(); + if(isa<ReturnInst>(terminator) || isa<UnreachableInst>(terminator) + || isa<UnwindInst>(terminator)) + addEdge(currentNode, getExit(),0); + + currentNode->setColor(BallLarusNode::GRAY); + inDag[currentBlock] = currentNode; + + BasicBlock* oldSuccessor = 0; + unsigned duplicateNumber = 0; + + // iterate through this node's successors + for(succ_iterator successor = succ_begin(currentBlock), + succEnd = succ_end(currentBlock); successor != succEnd; + oldSuccessor = *successor, ++successor ) { + BasicBlock* succBB = *successor; + + // is this edge a duplicate? + if (oldSuccessor == succBB) + duplicateNumber++; + else + duplicateNumber = 0; + + buildEdge(inDag, dfsStack, currentNode, succBB, duplicateNumber); + } + } +} + +// Process an edge in the CFG for DAG building. +void BallLarusDag::buildEdge(BLBlockNodeMap& inDag, std::stack<BallLarusNode*>& + dfsStack, BallLarusNode* currentNode, + BasicBlock* succBB, unsigned duplicateCount) { + BallLarusNode* succNode = inDag[succBB]; + + if(succNode && succNode->getColor() == BallLarusNode::BLACK) { + // visited node and forward edge + addEdge(currentNode, succNode, duplicateCount); + } else if(succNode && succNode->getColor() == BallLarusNode::GRAY) { + // visited node and back edge + DEBUG(dbgs() << "Backedge detected.\n"); + addBackedge(currentNode, succNode, duplicateCount); + } else { + BallLarusNode* childNode; + // not visited node and forward edge + if(succNode) // an unvisited node that is child of a gray node + childNode = succNode; + else { // an unvisited node that is a child of a an unvisted node + childNode = addNode(succBB); + inDag[succBB] = childNode; + } + addEdge(currentNode, childNode, duplicateCount); + dfsStack.push(childNode); + } +} + +// The weight on each edge is the increment required along any path that +// contains that edge. +void BallLarusDag::calculatePathNumbersFrom(BallLarusNode* node) { + if(node == getExit()) + // The Exit node must be base case + node->setNumberPaths(1); + else { + unsigned sumPaths = 0; + BallLarusNode* succNode; + + for(BLEdgeIterator succ = node->succBegin(), end = node->succEnd(); + succ != end; succ++) { + if( (*succ)->getType() == BallLarusEdge::BACKEDGE || + (*succ)->getType() == BallLarusEdge::SPLITEDGE ) + continue; + + (*succ)->setWeight(sumPaths); + succNode = (*succ)->getTarget(); + + if( !succNode->getNumberPaths() ) + return; + sumPaths += succNode->getNumberPaths(); + } + + node->setNumberPaths(sumPaths); + } +} + +// Allows subclasses to determine which type of Node is created. +// Override this method to produce subclasses of BallLarusNode if +// necessary. The destructor of BallLarusDag will call free on each +// pointer created. +BallLarusNode* BallLarusDag::createNode(BasicBlock* BB) { + return( new BallLarusNode(BB) ); +} + +// Allows subclasses to determine which type of Edge is created. +// Override this method to produce subclasses of BallLarusEdge if +// necessary. The destructor of BallLarusDag will call free on each +// pointer created. +BallLarusEdge* BallLarusDag::createEdge(BallLarusNode* source, + BallLarusNode* target, + unsigned duplicateCount) { + return( new BallLarusEdge(source, target, duplicateCount) ); +} + +// Proxy to node's constructor. Updates the DAG state. +BallLarusNode* BallLarusDag::addNode(BasicBlock* BB) { + BallLarusNode* newNode = createNode(BB); + _nodes.push_back(newNode); + return( newNode ); +} + +// Proxy to edge's constructor. Updates the DAG state. +BallLarusEdge* BallLarusDag::addEdge(BallLarusNode* source, + BallLarusNode* target, + unsigned duplicateCount) { + BallLarusEdge* newEdge = createEdge(source, target, duplicateCount); + _edges.push_back(newEdge); + source->addSuccEdge(newEdge); + target->addPredEdge(newEdge); + return(newEdge); +} + +// Adds a backedge with its phony edges. Updates the DAG state. +void BallLarusDag::addBackedge(BallLarusNode* source, BallLarusNode* target, + unsigned duplicateCount) { + BallLarusEdge* childEdge = addEdge(source, target, duplicateCount); + childEdge->setType(BallLarusEdge::BACKEDGE); + + childEdge->setPhonyRoot(addEdge(getRoot(), target,0)); + childEdge->setPhonyExit(addEdge(source, getExit(),0)); + + childEdge->getPhonyRoot()->setRealEdge(childEdge); + childEdge->getPhonyRoot()->setType(BallLarusEdge::BACKEDGE_PHONY); + + childEdge->getPhonyExit()->setRealEdge(childEdge); + childEdge->getPhonyExit()->setType(BallLarusEdge::BACKEDGE_PHONY); + _backEdges.push_back(childEdge); +} diff --git a/contrib/llvm/lib/Analysis/PathProfileInfo.cpp b/contrib/llvm/lib/Analysis/PathProfileInfo.cpp new file mode 100644 index 0000000..b361d3f --- /dev/null +++ b/contrib/llvm/lib/Analysis/PathProfileInfo.cpp @@ -0,0 +1,434 @@ +//===- PathProfileInfo.cpp ------------------------------------*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface used by optimizers to load path profiles, +// and provides a loader pass which reads a path profile file. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "path-profile-info" + +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ProfileInfoTypes.h" +#include "llvm/Analysis/PathProfileInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#include <cstdio> + +using namespace llvm; + +// command line option for loading path profiles +static cl::opt<std::string> +PathProfileInfoFilename("path-profile-loader-file", cl::init("llvmprof.out"), + cl::value_desc("filename"), + cl::desc("Path profile file loaded by -path-profile-loader"), cl::Hidden); + +namespace { + class PathProfileLoaderPass : public ModulePass, public PathProfileInfo { + public: + PathProfileLoaderPass() : ModulePass(ID) { } + ~PathProfileLoaderPass(); + + // this pass doesn't change anything (only loads information) + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + // the full name of the loader pass + virtual const char* getPassName() const { + return "Path Profiling Information Loader"; + } + + // required since this pass implements multiple inheritance + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &PathProfileInfo::ID) + return (PathProfileInfo*)this; + return this; + } + + // entry point to run the pass + bool runOnModule(Module &M); + + // pass identification + static char ID; + + private: + // make a reference table to refer to function by number + void buildFunctionRefs(Module &M); + + // process argument info of a program from the input file + void handleArgumentInfo(); + + // process path number information from the input file + void handlePathInfo(); + + // array of references to the functions in the module + std::vector<Function*> _functions; + + // path profile file handle + FILE* _file; + + // path profile file name + std::string _filename; + }; +} + +// register PathLoader +char PathProfileLoaderPass::ID = 0; + +INITIALIZE_ANALYSIS_GROUP(PathProfileInfo, "Path Profile Information", + NoPathProfileInfo) +INITIALIZE_AG_PASS(PathProfileLoaderPass, PathProfileInfo, + "path-profile-loader", + "Load path profile information from file", + false, true, false) + +char &llvm::PathProfileLoaderPassID = PathProfileLoaderPass::ID; + +// link PathLoader as a pass, and make it available as an optimisation +ModulePass *llvm::createPathProfileLoaderPass() { + return new PathProfileLoaderPass; +} + +// ---------------------------------------------------------------------------- +// PathEdge implementation +// +ProfilePathEdge::ProfilePathEdge (BasicBlock* source, BasicBlock* target, + unsigned duplicateNumber) + : _source(source), _target(target), _duplicateNumber(duplicateNumber) {} + +// ---------------------------------------------------------------------------- +// Path implementation +// + +ProfilePath::ProfilePath (unsigned int number, unsigned int count, + double countStdDev, PathProfileInfo* ppi) + : _number(number) , _count(count), _countStdDev(countStdDev), _ppi(ppi) {} + +double ProfilePath::getFrequency() const { + return 100 * double(_count) / + double(_ppi->_functionPathCounts[_ppi->_currentFunction]); +} + +static BallLarusEdge* getNextEdge (BallLarusNode* node, + unsigned int pathNumber) { + BallLarusEdge* best = 0; + + for( BLEdgeIterator next = node->succBegin(), + end = node->succEnd(); next != end; next++ ) { + if( (*next)->getType() != BallLarusEdge::BACKEDGE && // no backedges + (*next)->getType() != BallLarusEdge::SPLITEDGE && // no split edges + (*next)->getWeight() <= pathNumber && // weight must be <= pathNumber + (!best || (best->getWeight() < (*next)->getWeight())) ) // best one? + best = *next; + } + + return best; +} + +ProfilePathEdgeVector* ProfilePath::getPathEdges() const { + BallLarusNode* currentNode = _ppi->_currentDag->getRoot (); + unsigned int increment = _number; + ProfilePathEdgeVector* pev = new ProfilePathEdgeVector; + + while (currentNode != _ppi->_currentDag->getExit()) { + BallLarusEdge* next = getNextEdge(currentNode, increment); + + increment -= next->getWeight(); + + if( next->getType() != BallLarusEdge::BACKEDGE_PHONY && + next->getType() != BallLarusEdge::SPLITEDGE_PHONY && + next->getTarget() != _ppi->_currentDag->getExit() ) + pev->push_back(ProfilePathEdge( + next->getSource()->getBlock(), + next->getTarget()->getBlock(), + next->getDuplicateNumber())); + + if( next->getType() == BallLarusEdge::BACKEDGE_PHONY && + next->getTarget() == _ppi->_currentDag->getExit() ) + pev->push_back(ProfilePathEdge( + next->getRealEdge()->getSource()->getBlock(), + next->getRealEdge()->getTarget()->getBlock(), + next->getDuplicateNumber())); + + if( next->getType() == BallLarusEdge::SPLITEDGE_PHONY && + next->getSource() == _ppi->_currentDag->getRoot() ) + pev->push_back(ProfilePathEdge( + next->getRealEdge()->getSource()->getBlock(), + next->getRealEdge()->getTarget()->getBlock(), + next->getDuplicateNumber())); + + // set the new node + currentNode = next->getTarget(); + } + + return pev; +} + +ProfilePathBlockVector* ProfilePath::getPathBlocks() const { + BallLarusNode* currentNode = _ppi->_currentDag->getRoot (); + unsigned int increment = _number; + ProfilePathBlockVector* pbv = new ProfilePathBlockVector; + + while (currentNode != _ppi->_currentDag->getExit()) { + BallLarusEdge* next = getNextEdge(currentNode, increment); + increment -= next->getWeight(); + + // add block to the block list if it is a real edge + if( next->getType() == BallLarusEdge::NORMAL) + pbv->push_back (currentNode->getBlock()); + // make the back edge the last edge since we are at the end + else if( next->getTarget() == _ppi->_currentDag->getExit() ) { + pbv->push_back (currentNode->getBlock()); + pbv->push_back (next->getRealEdge()->getTarget()->getBlock()); + } + + // set the new node + currentNode = next->getTarget(); + } + + return pbv; +} + +BasicBlock* ProfilePath::getFirstBlockInPath() const { + BallLarusNode* root = _ppi->_currentDag->getRoot(); + BallLarusEdge* edge = getNextEdge(root, _number); + + if( edge && (edge->getType() == BallLarusEdge::BACKEDGE_PHONY || + edge->getType() == BallLarusEdge::SPLITEDGE_PHONY) ) + return edge->getTarget()->getBlock(); + + return root->getBlock(); +} + +// ---------------------------------------------------------------------------- +// PathProfileInfo implementation +// + +// Pass identification +char llvm::PathProfileInfo::ID = 0; + +PathProfileInfo::PathProfileInfo () : _currentDag(0) , _currentFunction(0) { +} + +PathProfileInfo::~PathProfileInfo() { + if (_currentDag) + delete _currentDag; +} + +// set the function for which paths are currently begin processed +void PathProfileInfo::setCurrentFunction(Function* F) { + // Make sure it exists + if (!F) return; + + if (_currentDag) + delete _currentDag; + + _currentFunction = F; + _currentDag = new BallLarusDag(*F); + _currentDag->init(); + _currentDag->calculatePathNumbers(); +} + +// get the function for which paths are currently being processed +Function* PathProfileInfo::getCurrentFunction() const { + return _currentFunction; +} + +// get the entry block of the function +BasicBlock* PathProfileInfo::getCurrentFunctionEntry() { + return _currentDag->getRoot()->getBlock(); +} + +// return the path based on its number +ProfilePath* PathProfileInfo::getPath(unsigned int number) { + return _functionPaths[_currentFunction][number]; +} + +// return the number of paths which a function may potentially execute +unsigned int PathProfileInfo::getPotentialPathCount() { + return _currentDag ? _currentDag->getNumberOfPaths() : 0; +} + +// return an iterator for the beginning of a functions executed paths +ProfilePathIterator PathProfileInfo::pathBegin() { + return _functionPaths[_currentFunction].begin(); +} + +// return an iterator for the end of a functions executed paths +ProfilePathIterator PathProfileInfo::pathEnd() { + return _functionPaths[_currentFunction].end(); +} + +// returns the total number of paths run in the function +unsigned int PathProfileInfo::pathsRun() { + return _currentFunction ? _functionPaths[_currentFunction].size() : 0; +} + +// ---------------------------------------------------------------------------- +// PathLoader implementation +// + +// remove all generated paths +PathProfileLoaderPass::~PathProfileLoaderPass() { + for( FunctionPathIterator funcNext = _functionPaths.begin(), + funcEnd = _functionPaths.end(); funcNext != funcEnd; funcNext++) + for( ProfilePathIterator pathNext = funcNext->second.begin(), + pathEnd = funcNext->second.end(); pathNext != pathEnd; pathNext++) + delete pathNext->second; +} + +// entry point of the pass; this loads and parses a file +bool PathProfileLoaderPass::runOnModule(Module &M) { + // get the filename and setup the module's function references + _filename = PathProfileInfoFilename; + buildFunctionRefs (M); + + if (!(_file = fopen(_filename.c_str(), "rb"))) { + errs () << "error: input '" << _filename << "' file does not exist.\n"; + return false; + } + + ProfilingType profType; + + while( fread(&profType, sizeof(ProfilingType), 1, _file) ) { + switch (profType) { + case ArgumentInfo: + handleArgumentInfo (); + break; + case PathInfo: + handlePathInfo (); + break; + default: + errs () << "error: bad path profiling file syntax, " << profType << "\n"; + fclose (_file); + return false; + } + } + + fclose (_file); + + return true; +} + +// create a reference table for functions defined in the path profile file +void PathProfileLoaderPass::buildFunctionRefs (Module &M) { + _functions.push_back(0); // make the 0 index a null pointer + + for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) { + if (F->isDeclaration()) + continue; + _functions.push_back(F); + } +} + +// handle command like argument infor in the output file +void PathProfileLoaderPass::handleArgumentInfo() { + // get the argument list's length + unsigned savedArgsLength; + if( fread(&savedArgsLength, sizeof(unsigned), 1, _file) != 1 ) { + errs() << "warning: argument info header/data mismatch\n"; + return; + } + + // allocate a buffer, and get the arguments + char* args = new char[savedArgsLength+1]; + if( fread(args, 1, savedArgsLength, _file) != savedArgsLength ) + errs() << "warning: argument info header/data mismatch\n"; + + args[savedArgsLength] = '\0'; + argList = std::string(args); + delete [] args; // cleanup dynamic string + + // byte alignment + if (savedArgsLength & 3) + fseek(_file, 4-(savedArgsLength&3), SEEK_CUR); +} + +// Handle path profile information in the output file +void PathProfileLoaderPass::handlePathInfo () { + // get the number of functions in this profile + unsigned functionCount; + if( fread(&functionCount, sizeof(functionCount), 1, _file) != 1 ) { + errs() << "warning: path info header/data mismatch\n"; + return; + } + + // gather path information for each function + for (unsigned i = 0; i < functionCount; i++) { + PathProfileHeader pathHeader; + if( fread(&pathHeader, sizeof(pathHeader), 1, _file) != 1 ) { + errs() << "warning: bad header for path function info\n"; + break; + } + + Function* f = _functions[pathHeader.fnNumber]; + + // dynamically allocate a table to store path numbers + PathProfileTableEntry* pathTable = + new PathProfileTableEntry[pathHeader.numEntries]; + + if( fread(pathTable, sizeof(PathProfileTableEntry), + pathHeader.numEntries, _file) != pathHeader.numEntries) { + delete [] pathTable; + errs() << "warning: path function info header/data mismatch\n"; + return; + } + + // Build a new path for the current function + unsigned int totalPaths = 0; + for (unsigned int j = 0; j < pathHeader.numEntries; j++) { + totalPaths += pathTable[j].pathCounter; + _functionPaths[f][pathTable[j].pathNumber] + = new ProfilePath(pathTable[j].pathNumber, pathTable[j].pathCounter, + 0, this); + } + + _functionPathCounts[f] = totalPaths; + + delete [] pathTable; + } +} + +//===----------------------------------------------------------------------===// +// NoProfile PathProfileInfo implementation +// + +namespace { + struct NoPathProfileInfo : public ImmutablePass, public PathProfileInfo { + static char ID; // Class identification, replacement for typeinfo + NoPathProfileInfo() : ImmutablePass(ID) { + initializeNoPathProfileInfoPass(*PassRegistry::getPassRegistry()); + } + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &PathProfileInfo::ID) + return (PathProfileInfo*)this; + return this; + } + + virtual const char *getPassName() const { + return "NoPathProfileInfo"; + } + }; +} // End of anonymous namespace + +char NoPathProfileInfo::ID = 0; +// Register this pass... +INITIALIZE_AG_PASS(NoPathProfileInfo, PathProfileInfo, "no-path-profile", + "No Path Profile Information", false, true, true) + +ImmutablePass *llvm::createNoPathProfileInfoPass() { return new NoPathProfileInfo(); } diff --git a/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp b/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp new file mode 100644 index 0000000..0ae734e --- /dev/null +++ b/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp @@ -0,0 +1,207 @@ +//===- PathProfileVerifier.cpp --------------------------------*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This verifier derives an edge profile file from current path profile +// information +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "path-profile-verifier" + +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ProfileInfoTypes.h" +#include "llvm/Analysis/PathProfileInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" + +#include <stdio.h> + +using namespace llvm; + +namespace { + class PathProfileVerifier : public ModulePass { + private: + bool runOnModule(Module &M); + + public: + static char ID; // Pass identification, replacement for typeid + PathProfileVerifier() : ModulePass(ID) { + initializePathProfileVerifierPass(*PassRegistry::getPassRegistry()); + } + + + virtual const char *getPassName() const { + return "Path Profiler Verifier"; + } + + // The verifier requires the path profile and edge profile. + virtual void getAnalysisUsage(AnalysisUsage& AU) const; + }; +} + +static cl::opt<std::string> +EdgeProfileFilename("path-profile-verifier-file", + cl::init("edgefrompath.llvmprof.out"), + cl::value_desc("filename"), + cl::desc("Edge profile file generated by -path-profile-verifier"), + cl::Hidden); + +char PathProfileVerifier::ID = 0; +INITIALIZE_PASS(PathProfileVerifier, "path-profile-verifier", + "Compare the path profile derived edge profile against the " + "edge profile.", true, true) + +ModulePass *llvm::createPathProfileVerifierPass() { + return new PathProfileVerifier(); +} + +// The verifier requires the path profile and edge profile. +void PathProfileVerifier::getAnalysisUsage(AnalysisUsage& AU) const { + AU.addRequired<PathProfileInfo>(); + AU.addPreserved<PathProfileInfo>(); +} + +typedef std::map<unsigned, unsigned> DuplicateToIndexMap; +typedef std::map<BasicBlock*,DuplicateToIndexMap> BlockToDuplicateMap; +typedef std::map<BasicBlock*,BlockToDuplicateMap> NestedBlockToIndexMap; + +// the verifier iterates through each path to gather the total +// number of edge frequencies +bool PathProfileVerifier::runOnModule (Module &M) { + PathProfileInfo& pathProfileInfo = getAnalysis<PathProfileInfo>(); + + // setup a data structure to map path edges which index an + // array of edge counters + NestedBlockToIndexMap arrayMap; + unsigned i = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + + arrayMap[0][F->begin()][0] = i++; + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + + unsigned duplicate = 0; + BasicBlock* prev = 0; + for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; + prev = TI->getSuccessor(s), ++s) { + if (prev == TI->getSuccessor(s)) + duplicate++; + else duplicate = 0; + + arrayMap[BB][TI->getSuccessor(s)][duplicate] = i++; + } + } + } + + std::vector<unsigned> edgeArray(i); + + // iterate through each path and increment the edge counters as needed + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + + pathProfileInfo.setCurrentFunction(F); + + DEBUG(dbgs() << "function '" << F->getName() << "' ran " + << pathProfileInfo.pathsRun() + << "/" << pathProfileInfo.getPotentialPathCount() + << " potential paths\n"); + + for( ProfilePathIterator nextPath = pathProfileInfo.pathBegin(), + endPath = pathProfileInfo.pathEnd(); + nextPath != endPath; nextPath++ ) { + ProfilePath* currentPath = nextPath->second; + + ProfilePathEdgeVector* pev = currentPath->getPathEdges(); + DEBUG(dbgs () << "path #" << currentPath->getNumber() << ": " + << currentPath->getCount() << "\n"); + // setup the entry edge (normally path profiling doesn't care about this) + if (currentPath->getFirstBlockInPath() == &F->getEntryBlock()) + edgeArray[arrayMap[0][currentPath->getFirstBlockInPath()][0]] + += currentPath->getCount(); + + for( ProfilePathEdgeIterator nextEdge = pev->begin(), + endEdge = pev->end(); nextEdge != endEdge; nextEdge++ ) { + if (nextEdge != pev->begin()) + DEBUG(dbgs() << " :: "); + + BasicBlock* source = nextEdge->getSource(); + BasicBlock* target = nextEdge->getTarget(); + unsigned duplicateNumber = nextEdge->getDuplicateNumber(); + DEBUG(dbgs () << source->getNameStr() << " --{" << duplicateNumber + << "}--> " << target->getNameStr()); + + // Ensure all the referenced edges exist + // TODO: make this a separate function + if( !arrayMap.count(source) ) { + errs() << " error [" << F->getNameStr() << "()]: source '" + << source->getNameStr() + << "' does not exist in the array map.\n"; + } else if( !arrayMap[source].count(target) ) { + errs() << " error [" << F->getNameStr() << "()]: target '" + << target->getNameStr() + << "' does not exist in the array map.\n"; + } else if( !arrayMap[source][target].count(duplicateNumber) ) { + errs() << " error [" << F->getNameStr() << "()]: edge " + << source->getNameStr() << " -> " << target->getNameStr() + << " duplicate number " << duplicateNumber + << " does not exist in the array map.\n"; + } else { + edgeArray[arrayMap[source][target][duplicateNumber]] + += currentPath->getCount(); + } + } + + DEBUG(errs() << "\n"); + + delete pev; + } + } + + std::string errorInfo; + std::string filename = EdgeProfileFilename; + + // Open a handle to the file + FILE* edgeFile = fopen(filename.c_str(),"wb"); + + if (!edgeFile) { + errs() << "error: unable to open file '" << filename << "' for output.\n"; + return false; + } + + errs() << "Generating edge profile '" << filename << "' ...\n"; + + // write argument info + unsigned type = ArgumentInfo; + unsigned num = pathProfileInfo.argList.size(); + int zeros = 0; + + fwrite(&type,sizeof(unsigned),1,edgeFile); + fwrite(&num,sizeof(unsigned),1,edgeFile); + fwrite(pathProfileInfo.argList.c_str(),1,num,edgeFile); + if (num&3) + fwrite(&zeros, 1, 4-(num&3), edgeFile); + + type = EdgeInfo; + num = edgeArray.size(); + fwrite(&type,sizeof(unsigned),1,edgeFile); + fwrite(&num,sizeof(unsigned),1,edgeFile); + + // write each edge to the file + for( std::vector<unsigned>::iterator s = edgeArray.begin(), + e = edgeArray.end(); s != e; s++) + fwrite(&*s, sizeof (unsigned), 1, edgeFile); + + fclose (edgeFile); + + return true; +} diff --git a/contrib/llvm/lib/Analysis/PostDominators.cpp b/contrib/llvm/lib/Analysis/PostDominators.cpp new file mode 100644 index 0000000..6ed2729 --- /dev/null +++ b/contrib/llvm/lib/Analysis/PostDominators.cpp @@ -0,0 +1,51 @@ +//===- PostDominators.cpp - Post-Dominator Calculation --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the post-dominator construction algorithms. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "postdomtree" + +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Instructions.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Analysis/DominatorInternals.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// PostDominatorTree Implementation +//===----------------------------------------------------------------------===// + +char PostDominatorTree::ID = 0; +INITIALIZE_PASS(PostDominatorTree, "postdomtree", + "Post-Dominator Tree Construction", true, true) + +bool PostDominatorTree::runOnFunction(Function &F) { + DT->recalculate(F); + return false; +} + +PostDominatorTree::~PostDominatorTree() { + delete DT; +} + +void PostDominatorTree::print(raw_ostream &OS, const Module *) const { + DT->print(OS); +} + + +FunctionPass* llvm::createPostDomTree() { + return new PostDominatorTree(); +} + diff --git a/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp b/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp new file mode 100644 index 0000000..b594e2b --- /dev/null +++ b/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp @@ -0,0 +1,426 @@ +//===- ProfileEstimatorPass.cpp - LLVM Pass to estimate profile info ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a concrete implementation of profiling information that +// estimates the profiling information in a very crude and unimaginative way. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "profile-estimator" +#include "llvm/Pass.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" +using namespace llvm; + +static cl::opt<double> +LoopWeight( + "profile-estimator-loop-weight", cl::init(10), + cl::value_desc("loop-weight"), + cl::desc("Number of loop executions used for profile-estimator") +); + +namespace { + class ProfileEstimatorPass : public FunctionPass, public ProfileInfo { + double ExecCount; + LoopInfo *LI; + std::set<BasicBlock*> BBToVisit; + std::map<Loop*,double> LoopExitWeights; + std::map<Edge,double> MinimalWeight; + public: + static char ID; // Class identification, replacement for typeinfo + explicit ProfileEstimatorPass(const double execcount = 0) + : FunctionPass(ID), ExecCount(execcount) { + initializeProfileEstimatorPassPass(*PassRegistry::getPassRegistry()); + if (execcount == 0) ExecCount = LoopWeight; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<LoopInfo>(); + } + + virtual const char *getPassName() const { + return "Profiling information estimator"; + } + + /// run - Estimate the profile information from the specified file. + virtual bool runOnFunction(Function &F); + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &ProfileInfo::ID) + return (ProfileInfo*)this; + return this; + } + + virtual void recurseBasicBlock(BasicBlock *BB); + + void inline printEdgeWeight(Edge); + }; +} // End of anonymous namespace + +char ProfileEstimatorPass::ID = 0; +INITIALIZE_AG_PASS_BEGIN(ProfileEstimatorPass, ProfileInfo, "profile-estimator", + "Estimate profiling information", false, true, false) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_AG_PASS_END(ProfileEstimatorPass, ProfileInfo, "profile-estimator", + "Estimate profiling information", false, true, false) + +namespace llvm { + char &ProfileEstimatorPassID = ProfileEstimatorPass::ID; + + FunctionPass *createProfileEstimatorPass() { + return new ProfileEstimatorPass(); + } + + /// createProfileEstimatorPass - This function returns a Pass that estimates + /// profiling information using the given loop execution count. + Pass *createProfileEstimatorPass(const unsigned execcount) { + return new ProfileEstimatorPass(execcount); + } +} + +static double ignoreMissing(double w) { + if (w == ProfileInfo::MissingValue) return 0; + return w; +} + +static void inline printEdgeError(ProfileInfo::Edge e, const char *M) { + DEBUG(dbgs() << "-- Edge " << e << " is not calculated, " << M << "\n"); +} + +void inline ProfileEstimatorPass::printEdgeWeight(Edge E) { + DEBUG(dbgs() << "-- Weight of Edge " << E << ":" + << format("%20.20g", getEdgeWeight(E)) << "\n"); +} + +// recurseBasicBlock() - This calculates the ProfileInfo estimation for a +// single block and then recurses into the successors. +// The algorithm preserves the flow condition, meaning that the sum of the +// weight of the incoming edges must be equal the block weight which must in +// turn be equal to the sume of the weights of the outgoing edges. +// Since the flow of an block is deterimined from the current state of the +// flow, once an edge has a flow assigned this flow is never changed again, +// otherwise it would be possible to violate the flow condition in another +// block. +void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) { + + // Break the recursion if this BasicBlock was already visited. + if (BBToVisit.find(BB) == BBToVisit.end()) return; + + // Read the LoopInfo for this block. + bool BBisHeader = LI->isLoopHeader(BB); + Loop* BBLoop = LI->getLoopFor(BB); + + // To get the block weight, read all incoming edges. + double BBWeight = 0; + std::set<BasicBlock*> ProcessedPreds; + for ( pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + bbi != bbe; ++bbi ) { + // If this block was not considered already, add weight. + Edge edge = getEdge(*bbi,BB); + double w = getEdgeWeight(edge); + if (ProcessedPreds.insert(*bbi).second) { + BBWeight += ignoreMissing(w); + } + // If this block is a loop header and the predecessor is contained in this + // loop, thus the edge is a backedge, continue and do not check if the + // value is valid. + if (BBisHeader && BBLoop->contains(*bbi)) { + printEdgeError(edge, "but is backedge, continuing"); + continue; + } + // If the edges value is missing (and this is no loop header, and this is + // no backedge) return, this block is currently non estimatable. + if (w == MissingValue) { + printEdgeError(edge, "returning"); + return; + } + } + if (getExecutionCount(BB) != MissingValue) { + BBWeight = getExecutionCount(BB); + } + + // Fetch all necessary information for current block. + SmallVector<Edge, 8> ExitEdges; + SmallVector<Edge, 8> Edges; + if (BBLoop) { + BBLoop->getExitEdges(ExitEdges); + } + + // If this is a loop header, consider the following: + // Exactly the flow that is entering this block, must exit this block too. So + // do the following: + // *) get all the exit edges, read the flow that is already leaving this + // loop, remember the edges that do not have any flow on them right now. + // (The edges that have already flow on them are most likely exiting edges of + // other loops, do not touch those flows because the previously caclulated + // loopheaders would not be exact anymore.) + // *) In case there is not a single exiting edge left, create one at the loop + // latch to prevent the flow from building up in the loop. + // *) Take the flow that is not leaving the loop already and distribute it on + // the remaining exiting edges. + // (This ensures that all flow that enters the loop also leaves it.) + // *) Increase the flow into the loop by increasing the weight of this block. + // There is at least one incoming backedge that will bring us this flow later + // on. (So that the flow condition in this node is valid again.) + if (BBisHeader) { + double incoming = BBWeight; + // Subtract the flow leaving the loop. + std::set<Edge> ProcessedExits; + for (SmallVector<Edge, 8>::iterator ei = ExitEdges.begin(), + ee = ExitEdges.end(); ei != ee; ++ei) { + if (ProcessedExits.insert(*ei).second) { + double w = getEdgeWeight(*ei); + if (w == MissingValue) { + Edges.push_back(*ei); + // Check if there is a necessary minimal weight, if yes, subtract it + // from weight. + if (MinimalWeight.find(*ei) != MinimalWeight.end()) { + incoming -= MinimalWeight[*ei]; + DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n"); + } + } else { + incoming -= w; + } + } + } + // If no exit edges, create one: + if (Edges.size() == 0) { + BasicBlock *Latch = BBLoop->getLoopLatch(); + if (Latch) { + Edge edge = getEdge(Latch,0); + EdgeInformation[BB->getParent()][edge] = BBWeight; + printEdgeWeight(edge); + edge = getEdge(Latch, BB); + EdgeInformation[BB->getParent()][edge] = BBWeight * ExecCount; + printEdgeWeight(edge); + } + } + + // Distribute remaining weight to the exting edges. To prevent fractions + // from building up and provoking precision problems the weight which is to + // be distributed is split and the rounded, the last edge gets a somewhat + // bigger value, but we are close enough for an estimation. + double fraction = floor(incoming/Edges.size()); + for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end(); + ei != ee; ++ei) { + double w = 0; + if (ei != (ee-1)) { + w = fraction; + incoming -= fraction; + } else { + w = incoming; + } + EdgeInformation[BB->getParent()][*ei] += w; + // Read necessary minimal weight. + if (MinimalWeight.find(*ei) != MinimalWeight.end()) { + EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei]; + DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n"); + } + printEdgeWeight(*ei); + + // Add minimal weight to paths to all exit edges, this is used to ensure + // that enough flow is reaching this edges. + Path p; + const BasicBlock *Dest = GetPath(BB, (*ei).first, p, GetPathToDest); + while (Dest != BB) { + const BasicBlock *Parent = p.find(Dest)->second; + Edge e = getEdge(Parent, Dest); + if (MinimalWeight.find(e) == MinimalWeight.end()) { + MinimalWeight[e] = 0; + } + MinimalWeight[e] += w; + DEBUG(dbgs() << "Minimal Weight for " << e << ": " << format("%.20g",MinimalWeight[e]) << "\n"); + Dest = Parent; + } + } + // Increase flow into the loop. + BBWeight *= (ExecCount+1); + } + + BlockInformation[BB->getParent()][BB] = BBWeight; + // Up until now we considered only the loop exiting edges, now we have a + // definite block weight and must distribute this onto the outgoing edges. + // Since there may be already flow attached to some of the edges, read this + // flow first and remember the edges that have still now flow attached. + Edges.clear(); + std::set<BasicBlock*> ProcessedSuccs; + + succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + // Also check for (BB,0) edges that may already contain some flow. (But only + // in case there are no successors.) + if (bbi == bbe) { + Edge edge = getEdge(BB,0); + EdgeInformation[BB->getParent()][edge] = BBWeight; + printEdgeWeight(edge); + } + for ( ; bbi != bbe; ++bbi ) { + if (ProcessedSuccs.insert(*bbi).second) { + Edge edge = getEdge(BB,*bbi); + double w = getEdgeWeight(edge); + if (w != MissingValue) { + BBWeight -= getEdgeWeight(edge); + } else { + Edges.push_back(edge); + // If minimal weight is necessary, reserve weight by subtracting weight + // from block weight, this is readded later on. + if (MinimalWeight.find(edge) != MinimalWeight.end()) { + BBWeight -= MinimalWeight[edge]; + DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[edge]) << " at " << edge << "\n"); + } + } + } + } + + double fraction = floor(BBWeight/Edges.size()); + // Finally we know what flow is still not leaving the block, distribute this + // flow onto the empty edges. + for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end(); + ei != ee; ++ei) { + if (ei != (ee-1)) { + EdgeInformation[BB->getParent()][*ei] += fraction; + BBWeight -= fraction; + } else { + EdgeInformation[BB->getParent()][*ei] += BBWeight; + } + // Readd minial necessary weight. + if (MinimalWeight.find(*ei) != MinimalWeight.end()) { + EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei]; + DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n"); + } + printEdgeWeight(*ei); + } + + // This block is visited, mark this before the recursion. + BBToVisit.erase(BB); + + // Recurse into successors. + for (succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + bbi != bbe; ++bbi) { + recurseBasicBlock(*bbi); + } +} + +bool ProfileEstimatorPass::runOnFunction(Function &F) { + if (F.isDeclaration()) return false; + + // Fetch LoopInfo and clear ProfileInfo for this function. + LI = &getAnalysis<LoopInfo>(); + FunctionInformation.erase(&F); + BlockInformation[&F].clear(); + EdgeInformation[&F].clear(); + BBToVisit.clear(); + + // Mark all blocks as to visit. + for (Function::iterator bi = F.begin(), be = F.end(); bi != be; ++bi) + BBToVisit.insert(bi); + + // Clear Minimal Edges. + MinimalWeight.clear(); + + DEBUG(dbgs() << "Working on function " << F.getNameStr() << "\n"); + + // Since the entry block is the first one and has no predecessors, the edge + // (0,entry) is inserted with the starting weight of 1. + BasicBlock *entry = &F.getEntryBlock(); + BlockInformation[&F][entry] = pow(2.0, 32.0); + Edge edge = getEdge(0,entry); + EdgeInformation[&F][edge] = BlockInformation[&F][entry]; + printEdgeWeight(edge); + + // Since recurseBasicBlock() maybe returns with a block which was not fully + // estimated, use recurseBasicBlock() until everything is calculated. + bool cleanup = false; + recurseBasicBlock(entry); + while (BBToVisit.size() > 0 && !cleanup) { + // Remember number of open blocks, this is later used to check if progress + // was made. + unsigned size = BBToVisit.size(); + + // Try to calculate all blocks in turn. + for (std::set<BasicBlock*>::iterator bi = BBToVisit.begin(), + be = BBToVisit.end(); bi != be; ++bi) { + recurseBasicBlock(*bi); + // If at least one block was finished, break because iterator may be + // invalid. + if (BBToVisit.size() < size) break; + } + + // If there was not a single block resolved, make some assumptions. + if (BBToVisit.size() == size) { + bool found = false; + for (std::set<BasicBlock*>::iterator BBI = BBToVisit.begin(), BBE = BBToVisit.end(); + (BBI != BBE) && (!found); ++BBI) { + BasicBlock *BB = *BBI; + // Try each predecessor if it can be assumend. + for (pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + (bbi != bbe) && (!found); ++bbi) { + Edge e = getEdge(*bbi,BB); + double w = getEdgeWeight(e); + // Check that edge from predecessor is still free. + if (w == MissingValue) { + // Check if there is a circle from this block to predecessor. + Path P; + const BasicBlock *Dest = GetPath(BB, *bbi, P, GetPathToDest); + if (Dest != *bbi) { + // If there is no circle, just set edge weight to 0 + EdgeInformation[&F][e] = 0; + DEBUG(dbgs() << "Assuming edge weight: "); + printEdgeWeight(e); + found = true; + } + } + } + } + if (!found) { + cleanup = true; + DEBUG(dbgs() << "No assumption possible in Fuction "<<F.getName()<<", setting all to zero\n"); + } + } + } + // In case there was no safe way to assume edges, set as a last measure, + // set _everything_ to zero. + if (cleanup) { + FunctionInformation[&F] = 0; + BlockInformation[&F].clear(); + EdgeInformation[&F].clear(); + for (Function::const_iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + const BasicBlock *BB = &(*FI); + BlockInformation[&F][BB] = 0; + const_pred_iterator predi = pred_begin(BB), prede = pred_end(BB); + if (predi == prede) { + Edge e = getEdge(0,BB); + setEdgeWeight(e,0); + } + for (;predi != prede; ++predi) { + Edge e = getEdge(*predi,BB); + setEdgeWeight(e,0); + } + succ_const_iterator succi = succ_begin(BB), succe = succ_end(BB); + if (succi == succe) { + Edge e = getEdge(BB,0); + setEdgeWeight(e,0); + } + for (;succi != succe; ++succi) { + Edge e = getEdge(*succi,BB); + setEdgeWeight(e,0); + } + } + } + + return false; +} diff --git a/contrib/llvm/lib/Analysis/ProfileInfo.cpp b/contrib/llvm/lib/Analysis/ProfileInfo.cpp new file mode 100644 index 0000000..173de2c --- /dev/null +++ b/contrib/llvm/lib/Analysis/ProfileInfo.cpp @@ -0,0 +1,1105 @@ +//===- ProfileInfo.cpp - Profile Info Interface ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the abstract ProfileInfo interface, and the default +// "no profile" implementation. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "profile-info" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Pass.h" +#include "llvm/Support/CFG.h" +#include "llvm/ADT/SmallSet.h" +#include <set> +#include <queue> +#include <limits> +using namespace llvm; + +namespace llvm { + template<> char ProfileInfoT<Function,BasicBlock>::ID = 0; +} + +// Register the ProfileInfo interface, providing a nice name to refer to. +INITIALIZE_ANALYSIS_GROUP(ProfileInfo, "Profile Information", NoProfileInfo) + +namespace llvm { + +template <> +ProfileInfoT<MachineFunction, MachineBasicBlock>::ProfileInfoT() {} +template <> +ProfileInfoT<MachineFunction, MachineBasicBlock>::~ProfileInfoT() {} + +template <> +ProfileInfoT<Function, BasicBlock>::ProfileInfoT() { + MachineProfile = 0; +} +template <> +ProfileInfoT<Function, BasicBlock>::~ProfileInfoT() { + if (MachineProfile) delete MachineProfile; +} + +template<> +char ProfileInfoT<MachineFunction, MachineBasicBlock>::ID = 0; + +template<> +const double ProfileInfoT<Function,BasicBlock>::MissingValue = -1; + +template<> const +double ProfileInfoT<MachineFunction, MachineBasicBlock>::MissingValue = -1; + +template<> double +ProfileInfoT<Function,BasicBlock>::getExecutionCount(const BasicBlock *BB) { + std::map<const Function*, BlockCounts>::iterator J = + BlockInformation.find(BB->getParent()); + if (J != BlockInformation.end()) { + BlockCounts::iterator I = J->second.find(BB); + if (I != J->second.end()) + return I->second; + } + + double Count = MissingValue; + + const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + + // Are there zero predecessors of this block? + if (PI == PE) { + Edge e = getEdge(0, BB); + Count = getEdgeWeight(e); + } else { + // Otherwise, if there are predecessors, the execution count of this block is + // the sum of the edge frequencies from the incoming edges. + std::set<const BasicBlock*> ProcessedPreds; + Count = 0; + for (; PI != PE; ++PI) { + const BasicBlock *P = *PI; + if (ProcessedPreds.insert(P).second) { + double w = getEdgeWeight(getEdge(P, BB)); + if (w == MissingValue) { + Count = MissingValue; + break; + } + Count += w; + } + } + } + + // If the predecessors did not suffice to get block weight, try successors. + if (Count == MissingValue) { + + succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB); + + // Are there zero successors of this block? + if (SI == SE) { + Edge e = getEdge(BB,0); + Count = getEdgeWeight(e); + } else { + std::set<const BasicBlock*> ProcessedSuccs; + Count = 0; + for (; SI != SE; ++SI) + if (ProcessedSuccs.insert(*SI).second) { + double w = getEdgeWeight(getEdge(BB, *SI)); + if (w == MissingValue) { + Count = MissingValue; + break; + } + Count += w; + } + } + } + + if (Count != MissingValue) BlockInformation[BB->getParent()][BB] = Count; + return Count; +} + +template<> +double ProfileInfoT<MachineFunction, MachineBasicBlock>:: + getExecutionCount(const MachineBasicBlock *MBB) { + std::map<const MachineFunction*, BlockCounts>::iterator J = + BlockInformation.find(MBB->getParent()); + if (J != BlockInformation.end()) { + BlockCounts::iterator I = J->second.find(MBB); + if (I != J->second.end()) + return I->second; + } + + return MissingValue; +} + +template<> +double ProfileInfoT<Function,BasicBlock>::getExecutionCount(const Function *F) { + std::map<const Function*, double>::iterator J = + FunctionInformation.find(F); + if (J != FunctionInformation.end()) + return J->second; + + // isDeclaration() is checked here and not at start of function to allow + // functions without a body still to have a execution count. + if (F->isDeclaration()) return MissingValue; + + double Count = getExecutionCount(&F->getEntryBlock()); + if (Count != MissingValue) FunctionInformation[F] = Count; + return Count; +} + +template<> +double ProfileInfoT<MachineFunction, MachineBasicBlock>:: + getExecutionCount(const MachineFunction *MF) { + std::map<const MachineFunction*, double>::iterator J = + FunctionInformation.find(MF); + if (J != FunctionInformation.end()) + return J->second; + + double Count = getExecutionCount(&MF->front()); + if (Count != MissingValue) FunctionInformation[MF] = Count; + return Count; +} + +template<> +void ProfileInfoT<Function,BasicBlock>:: + setExecutionCount(const BasicBlock *BB, double w) { + DEBUG(dbgs() << "Creating Block " << BB->getName() + << " (weight: " << format("%.20g",w) << ")\n"); + BlockInformation[BB->getParent()][BB] = w; +} + +template<> +void ProfileInfoT<MachineFunction, MachineBasicBlock>:: + setExecutionCount(const MachineBasicBlock *MBB, double w) { + DEBUG(dbgs() << "Creating Block " << MBB->getBasicBlock()->getName() + << " (weight: " << format("%.20g",w) << ")\n"); + BlockInformation[MBB->getParent()][MBB] = w; +} + +template<> +void ProfileInfoT<Function,BasicBlock>::addEdgeWeight(Edge e, double w) { + double oldw = getEdgeWeight(e); + assert (oldw != MissingValue && "Adding weight to Edge with no previous weight"); + DEBUG(dbgs() << "Adding to Edge " << e + << " (new weight: " << format("%.20g",oldw + w) << ")\n"); + EdgeInformation[getFunction(e)][e] = oldw + w; +} + +template<> +void ProfileInfoT<Function,BasicBlock>:: + addExecutionCount(const BasicBlock *BB, double w) { + double oldw = getExecutionCount(BB); + assert (oldw != MissingValue && "Adding weight to Block with no previous weight"); + DEBUG(dbgs() << "Adding to Block " << BB->getName() + << " (new weight: " << format("%.20g",oldw + w) << ")\n"); + BlockInformation[BB->getParent()][BB] = oldw + w; +} + +template<> +void ProfileInfoT<Function,BasicBlock>::removeBlock(const BasicBlock *BB) { + std::map<const Function*, BlockCounts>::iterator J = + BlockInformation.find(BB->getParent()); + if (J == BlockInformation.end()) return; + + DEBUG(dbgs() << "Deleting " << BB->getName() << "\n"); + J->second.erase(BB); +} + +template<> +void ProfileInfoT<Function,BasicBlock>::removeEdge(Edge e) { + std::map<const Function*, EdgeWeights>::iterator J = + EdgeInformation.find(getFunction(e)); + if (J == EdgeInformation.end()) return; + + DEBUG(dbgs() << "Deleting" << e << "\n"); + J->second.erase(e); +} + +template<> +void ProfileInfoT<Function,BasicBlock>:: + replaceEdge(const Edge &oldedge, const Edge &newedge) { + double w; + if ((w = getEdgeWeight(newedge)) == MissingValue) { + w = getEdgeWeight(oldedge); + DEBUG(dbgs() << "Replacing " << oldedge << " with " << newedge << "\n"); + } else { + w += getEdgeWeight(oldedge); + DEBUG(dbgs() << "Adding " << oldedge << " to " << newedge << "\n"); + } + setEdgeWeight(newedge,w); + removeEdge(oldedge); +} + +template<> +const BasicBlock *ProfileInfoT<Function,BasicBlock>:: + GetPath(const BasicBlock *Src, const BasicBlock *Dest, + Path &P, unsigned Mode) { + const BasicBlock *BB = 0; + bool hasFoundPath = false; + + std::queue<const BasicBlock *> BFS; + BFS.push(Src); + + while(BFS.size() && !hasFoundPath) { + BB = BFS.front(); + BFS.pop(); + + succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB); + if (Succ == End) { + P[0] = BB; + if (Mode & GetPathToExit) { + hasFoundPath = true; + BB = 0; + } + } + for(;Succ != End; ++Succ) { + if (P.find(*Succ) != P.end()) continue; + Edge e = getEdge(BB,*Succ); + if ((Mode & GetPathWithNewEdges) && (getEdgeWeight(e) != MissingValue)) continue; + P[*Succ] = BB; + BFS.push(*Succ); + if ((Mode & GetPathToDest) && *Succ == Dest) { + hasFoundPath = true; + BB = *Succ; + break; + } + if ((Mode & GetPathToValue) && (getExecutionCount(*Succ) != MissingValue)) { + hasFoundPath = true; + BB = *Succ; + break; + } + } + } + + return BB; +} + +template<> +void ProfileInfoT<Function,BasicBlock>:: + divertFlow(const Edge &oldedge, const Edge &newedge) { + DEBUG(dbgs() << "Diverting " << oldedge << " via " << newedge ); + + // First check if the old edge was taken, if not, just delete it... + if (getEdgeWeight(oldedge) == 0) { + removeEdge(oldedge); + return; + } + + Path P; + P[newedge.first] = 0; + P[newedge.second] = newedge.first; + const BasicBlock *BB = GetPath(newedge.second,oldedge.second,P,GetPathToExit | GetPathToDest); + + double w = getEdgeWeight (oldedge); + DEBUG(dbgs() << ", Weight: " << format("%.20g",w) << "\n"); + do { + const BasicBlock *Parent = P.find(BB)->second; + Edge e = getEdge(Parent,BB); + double oldw = getEdgeWeight(e); + double oldc = getExecutionCount(e.first); + setEdgeWeight(e, w+oldw); + if (Parent != oldedge.first) { + setExecutionCount(e.first, w+oldc); + } + BB = Parent; + } while (BB != newedge.first); + removeEdge(oldedge); +} + +/// Replaces all occurrences of RmBB in the ProfilingInfo with DestBB. +/// This checks all edges of the function the blocks reside in and replaces the +/// occurrences of RmBB with DestBB. +template<> +void ProfileInfoT<Function,BasicBlock>:: + replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB) { + DEBUG(dbgs() << "Replacing " << RmBB->getName() + << " with " << DestBB->getName() << "\n"); + const Function *F = DestBB->getParent(); + std::map<const Function*, EdgeWeights>::iterator J = + EdgeInformation.find(F); + if (J == EdgeInformation.end()) return; + + Edge e, newedge; + bool erasededge = false; + EdgeWeights::iterator I = J->second.begin(), E = J->second.end(); + while(I != E) { + e = (I++)->first; + bool foundedge = false; bool eraseedge = false; + if (e.first == RmBB) { + if (e.second == DestBB) { + eraseedge = true; + } else { + newedge = getEdge(DestBB, e.second); + foundedge = true; + } + } + if (e.second == RmBB) { + if (e.first == DestBB) { + eraseedge = true; + } else { + newedge = getEdge(e.first, DestBB); + foundedge = true; + } + } + if (foundedge) { + replaceEdge(e, newedge); + } + if (eraseedge) { + if (erasededge) { + Edge newedge = getEdge(DestBB, DestBB); + replaceEdge(e, newedge); + } else { + removeEdge(e); + erasededge = true; + } + } + } +} + +/// Splits an edge in the ProfileInfo and redirects flow over NewBB. +/// Since its possible that there is more than one edge in the CFG from FristBB +/// to SecondBB its necessary to redirect the flow proporionally. +template<> +void ProfileInfoT<Function,BasicBlock>::splitEdge(const BasicBlock *FirstBB, + const BasicBlock *SecondBB, + const BasicBlock *NewBB, + bool MergeIdenticalEdges) { + const Function *F = FirstBB->getParent(); + std::map<const Function*, EdgeWeights>::iterator J = + EdgeInformation.find(F); + if (J == EdgeInformation.end()) return; + + // Generate edges and read current weight. + Edge e = getEdge(FirstBB, SecondBB); + Edge n1 = getEdge(FirstBB, NewBB); + Edge n2 = getEdge(NewBB, SecondBB); + EdgeWeights &ECs = J->second; + double w = ECs[e]; + + int succ_count = 0; + if (!MergeIdenticalEdges) { + // First count the edges from FristBB to SecondBB, if there is more than + // one, only slice out a proporional part for NewBB. + for(succ_const_iterator BBI = succ_begin(FirstBB), BBE = succ_end(FirstBB); + BBI != BBE; ++BBI) { + if (*BBI == SecondBB) succ_count++; + } + // When the NewBB is completely new, increment the count by one so that + // the counts are properly distributed. + if (getExecutionCount(NewBB) == ProfileInfo::MissingValue) succ_count++; + } else { + // When the edges are merged anyway, then redirect all flow. + succ_count = 1; + } + + // We know now how many edges there are from FirstBB to SecondBB, reroute a + // proportional part of the edge weight over NewBB. + double neww = floor(w / succ_count); + ECs[n1] += neww; + ECs[n2] += neww; + BlockInformation[F][NewBB] += neww; + if (succ_count == 1) { + ECs.erase(e); + } else { + ECs[e] -= neww; + } +} + +template<> +void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *Old, + const BasicBlock* New) { + const Function *F = Old->getParent(); + std::map<const Function*, EdgeWeights>::iterator J = + EdgeInformation.find(F); + if (J == EdgeInformation.end()) return; + + DEBUG(dbgs() << "Splitting " << Old->getName() << " to " << New->getName() << "\n"); + + std::set<Edge> Edges; + for (EdgeWeights::iterator ewi = J->second.begin(), ewe = J->second.end(); + ewi != ewe; ++ewi) { + Edge old = ewi->first; + if (old.first == Old) { + Edges.insert(old); + } + } + for (std::set<Edge>::iterator EI = Edges.begin(), EE = Edges.end(); + EI != EE; ++EI) { + Edge newedge = getEdge(New, EI->second); + replaceEdge(*EI, newedge); + } + + double w = getExecutionCount(Old); + setEdgeWeight(getEdge(Old, New), w); + setExecutionCount(New, w); +} + +template<> +void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *BB, + const BasicBlock* NewBB, + BasicBlock *const *Preds, + unsigned NumPreds) { + const Function *F = BB->getParent(); + std::map<const Function*, EdgeWeights>::iterator J = + EdgeInformation.find(F); + if (J == EdgeInformation.end()) return; + + DEBUG(dbgs() << "Splitting " << NumPreds << " Edges from " << BB->getName() + << " to " << NewBB->getName() << "\n"); + + // Collect weight that was redirected over NewBB. + double newweight = 0; + + std::set<const BasicBlock *> ProcessedPreds; + // For all requestes Predecessors. + for (unsigned pred = 0; pred < NumPreds; ++pred) { + const BasicBlock * Pred = Preds[pred]; + if (ProcessedPreds.insert(Pred).second) { + // Create edges and read old weight. + Edge oldedge = getEdge(Pred, BB); + Edge newedge = getEdge(Pred, NewBB); + + // Remember how much weight was redirected. + newweight += getEdgeWeight(oldedge); + + replaceEdge(oldedge,newedge); + } + } + + Edge newedge = getEdge(NewBB,BB); + setEdgeWeight(newedge, newweight); + setExecutionCount(NewBB, newweight); +} + +template<> +void ProfileInfoT<Function,BasicBlock>::transfer(const Function *Old, + const Function *New) { + DEBUG(dbgs() << "Replacing Function " << Old->getName() << " with " + << New->getName() << "\n"); + std::map<const Function*, EdgeWeights>::iterator J = + EdgeInformation.find(Old); + if(J != EdgeInformation.end()) { + EdgeInformation[New] = J->second; + } + EdgeInformation.erase(Old); + BlockInformation.erase(Old); + FunctionInformation.erase(Old); +} + +static double readEdgeOrRemember(ProfileInfo::Edge edge, double w, + ProfileInfo::Edge &tocalc, unsigned &uncalc) { + if (w == ProfileInfo::MissingValue) { + tocalc = edge; + uncalc++; + return 0; + } else { + return w; + } +} + +template<> +bool ProfileInfoT<Function,BasicBlock>:: + CalculateMissingEdge(const BasicBlock *BB, Edge &removed, + bool assumeEmptySelf) { + Edge edgetocalc; + unsigned uncalculated = 0; + + // collect weights of all incoming and outgoing edges, rememer edges that + // have no value + double incount = 0; + SmallSet<const BasicBlock*,8> pred_visited; + const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + if (bbi==bbe) { + Edge e = getEdge(0,BB); + incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated); + } + for (;bbi != bbe; ++bbi) { + if (pred_visited.insert(*bbi)) { + Edge e = getEdge(*bbi,BB); + incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated); + } + } + + double outcount = 0; + SmallSet<const BasicBlock*,8> succ_visited; + succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB); + if (sbbi==sbbe) { + Edge e = getEdge(BB,0); + if (getEdgeWeight(e) == MissingValue) { + double w = getExecutionCount(BB); + if (w != MissingValue) { + setEdgeWeight(e,w); + removed = e; + } + } + outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated); + } + for (;sbbi != sbbe; ++sbbi) { + if (succ_visited.insert(*sbbi)) { + Edge e = getEdge(BB,*sbbi); + outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated); + } + } + + // if exactly one edge weight was missing, calculate it and remove it from + // spanning tree + if (uncalculated == 0 ) { + return true; + } else + if (uncalculated == 1) { + if (incount < outcount) { + EdgeInformation[BB->getParent()][edgetocalc] = outcount-incount; + } else { + EdgeInformation[BB->getParent()][edgetocalc] = incount-outcount; + } + DEBUG(dbgs() << "--Calc Edge Counter for " << edgetocalc << ": " + << format("%.20g", getEdgeWeight(edgetocalc)) << "\n"); + removed = edgetocalc; + return true; + } else + if (uncalculated == 2 && assumeEmptySelf && edgetocalc.first == edgetocalc.second && incount == outcount) { + setEdgeWeight(edgetocalc, incount * 10); + removed = edgetocalc; + return true; + } else { + return false; + } +} + +static void readEdge(ProfileInfo *PI, ProfileInfo::Edge e, double &calcw, std::set<ProfileInfo::Edge> &misscount) { + double w = PI->getEdgeWeight(e); + if (w != ProfileInfo::MissingValue) { + calcw += w; + } else { + misscount.insert(e); + } +} + +template<> +bool ProfileInfoT<Function,BasicBlock>::EstimateMissingEdges(const BasicBlock *BB) { + double inWeight = 0; + std::set<Edge> inMissing; + std::set<const BasicBlock*> ProcessedPreds; + const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + if (bbi == bbe) { + readEdge(this,getEdge(0,BB),inWeight,inMissing); + } + for( ; bbi != bbe; ++bbi ) { + if (ProcessedPreds.insert(*bbi).second) { + readEdge(this,getEdge(*bbi,BB),inWeight,inMissing); + } + } + + double outWeight = 0; + std::set<Edge> outMissing; + std::set<const BasicBlock*> ProcessedSuccs; + succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB); + if (sbbi == sbbe) + readEdge(this,getEdge(BB,0),outWeight,outMissing); + for ( ; sbbi != sbbe; ++sbbi ) { + if (ProcessedSuccs.insert(*sbbi).second) { + readEdge(this,getEdge(BB,*sbbi),outWeight,outMissing); + } + } + + double share; + std::set<Edge>::iterator ei,ee; + if (inMissing.size() == 0 && outMissing.size() > 0) { + ei = outMissing.begin(); + ee = outMissing.end(); + share = inWeight/outMissing.size(); + setExecutionCount(BB,inWeight); + } else + if (inMissing.size() > 0 && outMissing.size() == 0 && outWeight == 0) { + ei = inMissing.begin(); + ee = inMissing.end(); + share = 0; + setExecutionCount(BB,0); + } else + if (inMissing.size() == 0 && outMissing.size() == 0) { + setExecutionCount(BB,outWeight); + return true; + } else { + return false; + } + for ( ; ei != ee; ++ei ) { + setEdgeWeight(*ei,share); + } + return true; +} + +template<> +void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) { +// if (getExecutionCount(&(F->getEntryBlock())) == 0) { +// for (Function::const_iterator FI = F->begin(), FE = F->end(); +// FI != FE; ++FI) { +// const BasicBlock* BB = &(*FI); +// { +// const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); +// if (NBB == End) { +// setEdgeWeight(getEdge(0,BB),0); +// } +// for(;NBB != End; ++NBB) { +// setEdgeWeight(getEdge(*NBB,BB),0); +// } +// } +// { +// succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); +// if (NBB == End) { +// setEdgeWeight(getEdge(0,BB),0); +// } +// for(;NBB != End; ++NBB) { +// setEdgeWeight(getEdge(*NBB,BB),0); +// } +// } +// } +// return; +// } + // The set of BasicBlocks that are still unvisited. + std::set<const BasicBlock*> Unvisited; + + // The set of return edges (Edges with no successors). + std::set<Edge> ReturnEdges; + double ReturnWeight = 0; + + // First iterate over the whole function and collect: + // 1) The blocks in this function in the Unvisited set. + // 2) The return edges in the ReturnEdges set. + // 3) The flow that is leaving the function already via return edges. + + // Data structure for searching the function. + std::queue<const BasicBlock *> BFS; + const BasicBlock *BB = &(F->getEntryBlock()); + BFS.push(BB); + Unvisited.insert(BB); + + while (BFS.size()) { + BB = BFS.front(); BFS.pop(); + succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); + if (NBB == End) { + Edge e = getEdge(BB,0); + double w = getEdgeWeight(e); + if (w == MissingValue) { + // If the return edge has no value, try to read value from block. + double bw = getExecutionCount(BB); + if (bw != MissingValue) { + setEdgeWeight(e,bw); + ReturnWeight += bw; + } else { + // If both return edge and block provide no value, collect edge. + ReturnEdges.insert(e); + } + } else { + // If the return edge has a proper value, collect it. + ReturnWeight += w; + } + } + for (;NBB != End; ++NBB) { + if (Unvisited.insert(*NBB).second) { + BFS.push(*NBB); + } + } + } + + while (Unvisited.size() > 0) { + unsigned oldUnvisitedCount = Unvisited.size(); + bool FoundPath = false; + + // If there is only one edge left, calculate it. + if (ReturnEdges.size() == 1) { + ReturnWeight = getExecutionCount(&(F->getEntryBlock())) - ReturnWeight; + + Edge e = *ReturnEdges.begin(); + setEdgeWeight(e,ReturnWeight); + setExecutionCount(e.first,ReturnWeight); + + Unvisited.erase(e.first); + ReturnEdges.erase(e); + continue; + } + + // Calculate all blocks where only one edge is missing, this may also + // resolve furhter return edges. + std::set<const BasicBlock *>::iterator FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE) { + const BasicBlock *BB = *FI; ++FI; + Edge e; + if(CalculateMissingEdge(BB,e,true)) { + if (BlockInformation[F].find(BB) == BlockInformation[F].end()) { + setExecutionCount(BB,getExecutionCount(BB)); + } + Unvisited.erase(BB); + if (e.first != 0 && e.second == 0) { + ReturnEdges.erase(e); + ReturnWeight += getEdgeWeight(e); + } + } + } + if (oldUnvisitedCount > Unvisited.size()) continue; + + // Estimate edge weights by dividing the flow proportionally. + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE) { + const BasicBlock *BB = *FI; ++FI; + const BasicBlock *Dest = 0; + bool AllEdgesHaveSameReturn = true; + // Check each Successor, these must all end up in the same or an empty + // return block otherwise its dangerous to do an estimation on them. + for (succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB); + Succ != End; ++Succ) { + Path P; + GetPath(*Succ, 0, P, GetPathToExit); + if (Dest && Dest != P[0]) { + AllEdgesHaveSameReturn = false; + } + Dest = P[0]; + } + if (AllEdgesHaveSameReturn) { + if(EstimateMissingEdges(BB)) { + Unvisited.erase(BB); + break; + } + } + } + if (oldUnvisitedCount > Unvisited.size()) continue; + + // Check if there is a path to an block that has a known value and redirect + // flow accordingly. + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE && !FoundPath) { + // Fetch path. + const BasicBlock *BB = *FI; ++FI; + Path P; + const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToValue); + + // Calculate incoming flow. + double iw = 0; unsigned inmissing = 0; unsigned incount = 0; unsigned invalid = 0; + std::set<const BasicBlock *> Processed; + for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); + NBB != End; ++NBB) { + if (Processed.insert(*NBB).second) { + Edge e = getEdge(*NBB, BB); + double ew = getEdgeWeight(e); + if (ew != MissingValue) { + iw += ew; + invalid++; + } else { + // If the path contains the successor, this means its a backedge, + // do not count as missing. + if (P.find(*NBB) == P.end()) + inmissing++; + } + incount++; + } + } + if (inmissing == incount) continue; + if (invalid == 0) continue; + + // Subtract (already) outgoing flow. + Processed.clear(); + for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); + NBB != End; ++NBB) { + if (Processed.insert(*NBB).second) { + Edge e = getEdge(BB, *NBB); + double ew = getEdgeWeight(e); + if (ew != MissingValue) { + iw -= ew; + } + } + } + if (iw < 0) continue; + + // Check the receiving end of the path if it can handle the flow. + double ow = getExecutionCount(Dest); + Processed.clear(); + for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); + NBB != End; ++NBB) { + if (Processed.insert(*NBB).second) { + Edge e = getEdge(BB, *NBB); + double ew = getEdgeWeight(e); + if (ew != MissingValue) { + ow -= ew; + } + } + } + if (ow < 0) continue; + + // Determine how much flow shall be used. + double ew = getEdgeWeight(getEdge(P[Dest],Dest)); + if (ew != MissingValue) { + ew = ew<ow?ew:ow; + ew = ew<iw?ew:iw; + } else { + if (inmissing == 0) + ew = iw; + } + + // Create flow. + if (ew != MissingValue) { + do { + Edge e = getEdge(P[Dest],Dest); + if (getEdgeWeight(e) == MissingValue) { + setEdgeWeight(e,ew); + FoundPath = true; + } + Dest = P[Dest]; + } while (Dest != BB); + } + } + if (FoundPath) continue; + + // Calculate a block with self loop. + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE && !FoundPath) { + const BasicBlock *BB = *FI; ++FI; + bool SelfEdgeFound = false; + for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); + NBB != End; ++NBB) { + if (*NBB == BB) { + SelfEdgeFound = true; + break; + } + } + if (SelfEdgeFound) { + Edge e = getEdge(BB,BB); + if (getEdgeWeight(e) == MissingValue) { + double iw = 0; + std::set<const BasicBlock *> Processed; + for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); + NBB != End; ++NBB) { + if (Processed.insert(*NBB).second) { + Edge e = getEdge(*NBB, BB); + double ew = getEdgeWeight(e); + if (ew != MissingValue) { + iw += ew; + } + } + } + setEdgeWeight(e,iw * 10); + FoundPath = true; + } + } + } + if (FoundPath) continue; + + // Determine backedges, set them to zero. + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE && !FoundPath) { + const BasicBlock *BB = *FI; ++FI; + const BasicBlock *Dest = 0; + Path P; + bool BackEdgeFound = false; + for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); + NBB != End; ++NBB) { + Dest = GetPath(BB, *NBB, P, GetPathToDest | GetPathWithNewEdges); + if (Dest == *NBB) { + BackEdgeFound = true; + break; + } + } + if (BackEdgeFound) { + Edge e = getEdge(Dest,BB); + double w = getEdgeWeight(e); + if (w == MissingValue) { + setEdgeWeight(e,0); + FoundPath = true; + } + do { + Edge e = getEdge(P[Dest], Dest); + double w = getEdgeWeight(e); + if (w == MissingValue) { + setEdgeWeight(e,0); + FoundPath = true; + } + Dest = P[Dest]; + } while (Dest != BB); + } + } + if (FoundPath) continue; + + // Channel flow to return block. + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE && !FoundPath) { + const BasicBlock *BB = *FI; ++FI; + + Path P; + const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges); + Dest = P[0]; + if (!Dest) continue; + + if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) { + // Calculate incoming flow. + double iw = 0; + std::set<const BasicBlock *> Processed; + for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); + NBB != End; ++NBB) { + if (Processed.insert(*NBB).second) { + Edge e = getEdge(*NBB, BB); + double ew = getEdgeWeight(e); + if (ew != MissingValue) { + iw += ew; + } + } + } + do { + Edge e = getEdge(P[Dest], Dest); + double w = getEdgeWeight(e); + if (w == MissingValue) { + setEdgeWeight(e,iw); + FoundPath = true; + } else { + assert(0 && "Edge should not have value already!"); + } + Dest = P[Dest]; + } while (Dest != BB); + } + } + if (FoundPath) continue; + + // Speculatively set edges to zero. + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE && !FoundPath) { + const BasicBlock *BB = *FI; ++FI; + + for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); + NBB != End; ++NBB) { + Edge e = getEdge(*NBB,BB); + double w = getEdgeWeight(e); + if (w == MissingValue) { + setEdgeWeight(e,0); + FoundPath = true; + break; + } + } + } + if (FoundPath) continue; + + errs() << "{"; + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE) { + const BasicBlock *BB = *FI; ++FI; + dbgs() << BB->getName(); + if (FI != FE) + dbgs() << ","; + } + errs() << "}"; + + errs() << "ASSERT: could not repair function"; + assert(0 && "could not repair function"); + } + + EdgeWeights J = EdgeInformation[F]; + for (EdgeWeights::iterator EI = J.begin(), EE = J.end(); EI != EE; ++EI) { + Edge e = EI->first; + + bool SuccFound = false; + if (e.first != 0) { + succ_const_iterator NBB = succ_begin(e.first), End = succ_end(e.first); + if (NBB == End) { + if (0 == e.second) { + SuccFound = true; + } + } + for (;NBB != End; ++NBB) { + if (*NBB == e.second) { + SuccFound = true; + break; + } + } + if (!SuccFound) { + removeEdge(e); + } + } + } +} + +raw_ostream& operator<<(raw_ostream &O, const Function *F) { + return O << F->getName(); +} + +raw_ostream& operator<<(raw_ostream &O, const MachineFunction *MF) { + return O << MF->getFunction()->getName() << "(MF)"; +} + +raw_ostream& operator<<(raw_ostream &O, const BasicBlock *BB) { + return O << BB->getName(); +} + +raw_ostream& operator<<(raw_ostream &O, const MachineBasicBlock *MBB) { + return O << MBB->getBasicBlock()->getName() << "(MB)"; +} + +raw_ostream& operator<<(raw_ostream &O, std::pair<const BasicBlock *, const BasicBlock *> E) { + O << "("; + + if (E.first) + O << E.first; + else + O << "0"; + + O << ","; + + if (E.second) + O << E.second; + else + O << "0"; + + return O << ")"; +} + +raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, const MachineBasicBlock *> E) { + O << "("; + + if (E.first) + O << E.first; + else + O << "0"; + + O << ","; + + if (E.second) + O << E.second; + else + O << "0"; + + return O << ")"; +} + +} // namespace llvm + +//===----------------------------------------------------------------------===// +// NoProfile ProfileInfo implementation +// + +namespace { + struct NoProfileInfo : public ImmutablePass, public ProfileInfo { + static char ID; // Class identification, replacement for typeinfo + NoProfileInfo() : ImmutablePass(ID) { + initializeNoProfileInfoPass(*PassRegistry::getPassRegistry()); + } + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &ProfileInfo::ID) + return (ProfileInfo*)this; + return this; + } + + virtual const char *getPassName() const { + return "NoProfileInfo"; + } + }; +} // End of anonymous namespace + +char NoProfileInfo::ID = 0; +// Register this pass... +INITIALIZE_AG_PASS(NoProfileInfo, ProfileInfo, "no-profile", + "No Profile Information", false, true, true) + +ImmutablePass *llvm::createNoProfileInfoPass() { return new NoProfileInfo(); } diff --git a/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp b/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp new file mode 100644 index 0000000..eaa38da --- /dev/null +++ b/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp @@ -0,0 +1,157 @@ +//===- ProfileInfoLoad.cpp - Load profile information from disk -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The ProfileInfoLoader class is used to load and represent profiling +// information read in from the dump file. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ProfileInfoLoader.h" +#include "llvm/Analysis/ProfileInfoTypes.h" +#include "llvm/Module.h" +#include "llvm/InstrTypes.h" +#include "llvm/Support/raw_ostream.h" +#include <cstdio> +#include <cstdlib> +using namespace llvm; + +// ByteSwap - Byteswap 'Var' if 'Really' is true. +// +static inline unsigned ByteSwap(unsigned Var, bool Really) { + if (!Really) return Var; + return ((Var & (255U<< 0U)) << 24U) | + ((Var & (255U<< 8U)) << 8U) | + ((Var & (255U<<16U)) >> 8U) | + ((Var & (255U<<24U)) >> 24U); +} + +static unsigned AddCounts(unsigned A, unsigned B) { + // If either value is undefined, use the other. + if (A == ProfileInfoLoader::Uncounted) return B; + if (B == ProfileInfoLoader::Uncounted) return A; + return A + B; +} + +static void ReadProfilingBlock(const char *ToolName, FILE *F, + bool ShouldByteSwap, + std::vector<unsigned> &Data) { + // Read the number of entries... + unsigned NumEntries; + if (fread(&NumEntries, sizeof(unsigned), 1, F) != 1) { + errs() << ToolName << ": data packet truncated!\n"; + perror(0); + exit(1); + } + NumEntries = ByteSwap(NumEntries, ShouldByteSwap); + + // Read the counts... + std::vector<unsigned> TempSpace(NumEntries); + + // Read in the block of data... + if (fread(&TempSpace[0], sizeof(unsigned)*NumEntries, 1, F) != 1) { + errs() << ToolName << ": data packet truncated!\n"; + perror(0); + exit(1); + } + + // Make sure we have enough space... The space is initialised to -1 to + // facitiltate the loading of missing values for OptimalEdgeProfiling. + if (Data.size() < NumEntries) + Data.resize(NumEntries, ProfileInfoLoader::Uncounted); + + // Accumulate the data we just read into the data. + if (!ShouldByteSwap) { + for (unsigned i = 0; i != NumEntries; ++i) { + Data[i] = AddCounts(TempSpace[i], Data[i]); + } + } else { + for (unsigned i = 0; i != NumEntries; ++i) { + Data[i] = AddCounts(ByteSwap(TempSpace[i], true), Data[i]); + } + } +} + +const unsigned ProfileInfoLoader::Uncounted = ~0U; + +// ProfileInfoLoader ctor - Read the specified profiling data file, exiting the +// program if the file is invalid or broken. +// +ProfileInfoLoader::ProfileInfoLoader(const char *ToolName, + const std::string &Filename, + Module &TheModule) : + Filename(Filename), + M(TheModule), Warned(false) { + FILE *F = fopen(Filename.c_str(), "rb"); + if (F == 0) { + errs() << ToolName << ": Error opening '" << Filename << "': "; + perror(0); + exit(1); + } + + // Keep reading packets until we run out of them. + unsigned PacketType; + while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) { + // If the low eight bits of the packet are zero, we must be dealing with an + // endianness mismatch. Byteswap all words read from the profiling + // information. + bool ShouldByteSwap = (char)PacketType == 0; + PacketType = ByteSwap(PacketType, ShouldByteSwap); + + switch (PacketType) { + case ArgumentInfo: { + unsigned ArgLength; + if (fread(&ArgLength, sizeof(unsigned), 1, F) != 1) { + errs() << ToolName << ": arguments packet truncated!\n"; + perror(0); + exit(1); + } + ArgLength = ByteSwap(ArgLength, ShouldByteSwap); + + // Read in the arguments... + std::vector<char> Chars(ArgLength+4); + + if (ArgLength) + if (fread(&Chars[0], (ArgLength+3) & ~3, 1, F) != 1) { + errs() << ToolName << ": arguments packet truncated!\n"; + perror(0); + exit(1); + } + CommandLines.push_back(std::string(&Chars[0], &Chars[ArgLength])); + break; + } + + case FunctionInfo: + ReadProfilingBlock(ToolName, F, ShouldByteSwap, FunctionCounts); + break; + + case BlockInfo: + ReadProfilingBlock(ToolName, F, ShouldByteSwap, BlockCounts); + break; + + case EdgeInfo: + ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts); + break; + + case OptEdgeInfo: + ReadProfilingBlock(ToolName, F, ShouldByteSwap, OptimalEdgeCounts); + break; + + case BBTraceInfo: + ReadProfilingBlock(ToolName, F, ShouldByteSwap, BBTrace); + break; + + default: + errs() << ToolName << ": Unknown packet type #" << PacketType << "!\n"; + exit(1); + } + } + + fclose(F); +} + diff --git a/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp b/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp new file mode 100644 index 0000000..098079b --- /dev/null +++ b/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp @@ -0,0 +1,267 @@ +//===- ProfileInfoLoaderPass.cpp - LLVM Pass to load profile info ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a concrete implementation of profiling information that +// loads the information from a profile dump file. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "profile-loader" +#include "llvm/BasicBlock.h" +#include "llvm/InstrTypes.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Analysis/ProfileInfoLoader.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallSet.h" +#include <set> +using namespace llvm; + +STATISTIC(NumEdgesRead, "The # of edges read."); + +static cl::opt<std::string> +ProfileInfoFilename("profile-info-file", cl::init("llvmprof.out"), + cl::value_desc("filename"), + cl::desc("Profile file loaded by -profile-loader")); + +namespace { + class LoaderPass : public ModulePass, public ProfileInfo { + std::string Filename; + std::set<Edge> SpanningTree; + std::set<const BasicBlock*> BBisUnvisited; + unsigned ReadCount; + public: + static char ID; // Class identification, replacement for typeinfo + explicit LoaderPass(const std::string &filename = "") + : ModulePass(ID), Filename(filename) { + initializeLoaderPassPass(*PassRegistry::getPassRegistry()); + if (filename.empty()) Filename = ProfileInfoFilename; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + virtual const char *getPassName() const { + return "Profiling information loader"; + } + + // recurseBasicBlock() - Calculates the edge weights for as much basic + // blocks as possbile. + virtual void recurseBasicBlock(const BasicBlock *BB); + virtual void readEdgeOrRemember(Edge, Edge&, unsigned &, double &); + virtual void readEdge(ProfileInfo::Edge, std::vector<unsigned>&); + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &ProfileInfo::ID) + return (ProfileInfo*)this; + return this; + } + + /// run - Load the profile information from the specified file. + virtual bool runOnModule(Module &M); + }; +} // End of anonymous namespace + +char LoaderPass::ID = 0; +INITIALIZE_AG_PASS(LoaderPass, ProfileInfo, "profile-loader", + "Load profile information from llvmprof.out", false, true, false) + +char &llvm::ProfileLoaderPassID = LoaderPass::ID; + +ModulePass *llvm::createProfileLoaderPass() { return new LoaderPass(); } + +/// createProfileLoaderPass - This function returns a Pass that loads the +/// profiling information for the module from the specified filename, making it +/// available to the optimizers. +Pass *llvm::createProfileLoaderPass(const std::string &Filename) { + return new LoaderPass(Filename); +} + +void LoaderPass::readEdgeOrRemember(Edge edge, Edge &tocalc, + unsigned &uncalc, double &count) { + double w; + if ((w = getEdgeWeight(edge)) == MissingValue) { + tocalc = edge; + uncalc++; + } else { + count+=w; + } +} + +// recurseBasicBlock - Visits all neighbours of a block and then tries to +// calculate the missing edge values. +void LoaderPass::recurseBasicBlock(const BasicBlock *BB) { + + // break recursion if already visited + if (BBisUnvisited.find(BB) == BBisUnvisited.end()) return; + BBisUnvisited.erase(BB); + if (!BB) return; + + for (succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + bbi != bbe; ++bbi) { + recurseBasicBlock(*bbi); + } + for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + bbi != bbe; ++bbi) { + recurseBasicBlock(*bbi); + } + + Edge tocalc; + if (CalculateMissingEdge(BB, tocalc)) { + SpanningTree.erase(tocalc); + } +} + +void LoaderPass::readEdge(ProfileInfo::Edge e, + std::vector<unsigned> &ECs) { + if (ReadCount < ECs.size()) { + double weight = ECs[ReadCount++]; + if (weight != ProfileInfoLoader::Uncounted) { + // Here the data realm changes from the unsigned of the file to the + // double of the ProfileInfo. This conversion is save because we know + // that everything thats representable in unsinged is also representable + // in double. + EdgeInformation[getFunction(e)][e] += (double)weight; + + DEBUG(dbgs() << "--Read Edge Counter for " << e + << " (# "<< (ReadCount-1) << "): " + << (unsigned)getEdgeWeight(e) << "\n"); + } else { + // This happens only if reading optimal profiling information, not when + // reading regular profiling information. + SpanningTree.insert(e); + } + } +} + +bool LoaderPass::runOnModule(Module &M) { + ProfileInfoLoader PIL("profile-loader", Filename, M); + + EdgeInformation.clear(); + std::vector<unsigned> Counters = PIL.getRawEdgeCounts(); + if (Counters.size() > 0) { + ReadCount = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n"); + readEdge(getEdge(0,&F->getEntryBlock()), Counters); + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) { + readEdge(getEdge(BB,TI->getSuccessor(s)), Counters); + } + } + } + if (ReadCount != Counters.size()) { + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; + } + NumEdgesRead = ReadCount; + } + + Counters = PIL.getRawOptimalEdgeCounts(); + if (Counters.size() > 0) { + ReadCount = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n"); + readEdge(getEdge(0,&F->getEntryBlock()), Counters); + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + if (TI->getNumSuccessors() == 0) { + readEdge(getEdge(BB,0), Counters); + } + for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) { + readEdge(getEdge(BB,TI->getSuccessor(s)), Counters); + } + } + while (SpanningTree.size() > 0) { + + unsigned size = SpanningTree.size(); + + BBisUnvisited.clear(); + for (std::set<Edge>::iterator ei = SpanningTree.begin(), + ee = SpanningTree.end(); ei != ee; ++ei) { + BBisUnvisited.insert(ei->first); + BBisUnvisited.insert(ei->second); + } + while (BBisUnvisited.size() > 0) { + recurseBasicBlock(*BBisUnvisited.begin()); + } + + if (SpanningTree.size() == size) { + DEBUG(dbgs()<<"{"); + for (std::set<Edge>::iterator ei = SpanningTree.begin(), + ee = SpanningTree.end(); ei != ee; ++ei) { + DEBUG(dbgs()<< *ei <<","); + } + assert(0 && "No edge calculated!"); + } + + } + } + if (ReadCount != Counters.size()) { + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; + } + NumEdgesRead = ReadCount; + } + + BlockInformation.clear(); + Counters = PIL.getRawBlockCounts(); + if (Counters.size() > 0) { + ReadCount = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + if (ReadCount < Counters.size()) + // Here the data realm changes from the unsigned of the file to the + // double of the ProfileInfo. This conversion is save because we know + // that everything thats representable in unsinged is also + // representable in double. + BlockInformation[F][BB] = (double)Counters[ReadCount++]; + } + if (ReadCount != Counters.size()) { + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; + } + } + + FunctionInformation.clear(); + Counters = PIL.getRawFunctionCounts(); + if (Counters.size() > 0) { + ReadCount = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + if (ReadCount < Counters.size()) + // Here the data realm changes from the unsigned of the file to the + // double of the ProfileInfo. This conversion is save because we know + // that everything thats representable in unsinged is also + // representable in double. + FunctionInformation[F] = (double)Counters[ReadCount++]; + } + if (ReadCount != Counters.size()) { + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; + } + } + + return false; +} diff --git a/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp b/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp new file mode 100644 index 0000000..a017518 --- /dev/null +++ b/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp @@ -0,0 +1,382 @@ +//===- ProfileVerifierPass.cpp - LLVM Pass to estimate profile info -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass that checks profiling information for +// plausibility. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "profile-verifier" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Debug.h" +#include <set> +using namespace llvm; + +static cl::opt<bool,false> +ProfileVerifierDisableAssertions("profile-verifier-noassert", + cl::desc("Disable assertions")); + +namespace llvm { + template<class FType, class BType> + class ProfileVerifierPassT : public FunctionPass { + + struct DetailedBlockInfo { + const BType *BB; + double BBWeight; + double inWeight; + int inCount; + double outWeight; + int outCount; + }; + + ProfileInfoT<FType, BType> *PI; + std::set<const BType*> BBisVisited; + std::set<const FType*> FisVisited; + bool DisableAssertions; + + // When debugging is enabled, the verifier prints a whole slew of debug + // information, otherwise its just the assert. These are all the helper + // functions. + bool PrintedDebugTree; + std::set<const BType*> BBisPrinted; + void debugEntry(DetailedBlockInfo*); + void printDebugInfo(const BType *BB); + + public: + static char ID; // Class identification, replacement for typeinfo + + explicit ProfileVerifierPassT () : FunctionPass(ID) { + initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry()); + DisableAssertions = ProfileVerifierDisableAssertions; + } + explicit ProfileVerifierPassT (bool da) : FunctionPass(ID), + DisableAssertions(da) { + initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<ProfileInfoT<FType, BType> >(); + } + + const char *getPassName() const { + return "Profiling information verifier"; + } + + /// run - Verify the profile information. + bool runOnFunction(FType &F); + void recurseBasicBlock(const BType*); + + bool exitReachable(const FType*); + double ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge); + void CheckValue(bool, const char*, DetailedBlockInfo*); + }; + + typedef ProfileVerifierPassT<Function, BasicBlock> ProfileVerifierPass; + + template<class FType, class BType> + void ProfileVerifierPassT<FType, BType>::printDebugInfo(const BType *BB) { + + if (BBisPrinted.find(BB) != BBisPrinted.end()) return; + + double BBWeight = PI->getExecutionCount(BB); + if (BBWeight == ProfileInfoT<FType, BType>::MissingValue) { BBWeight = 0; } + double inWeight = 0; + int inCount = 0; + std::set<const BType*> ProcessedPreds; + for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + bbi != bbe; ++bbi ) { + if (ProcessedPreds.insert(*bbi).second) { + typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(*bbi,BB); + double EdgeWeight = PI->getEdgeWeight(E); + if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; } + dbgs() << "calculated in-edge " << E << ": " + << format("%20.20g",EdgeWeight) << "\n"; + inWeight += EdgeWeight; + inCount++; + } + } + double outWeight = 0; + int outCount = 0; + std::set<const BType*> ProcessedSuccs; + for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + bbi != bbe; ++bbi ) { + if (ProcessedSuccs.insert(*bbi).second) { + typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(BB,*bbi); + double EdgeWeight = PI->getEdgeWeight(E); + if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; } + dbgs() << "calculated out-edge " << E << ": " + << format("%20.20g",EdgeWeight) << "\n"; + outWeight += EdgeWeight; + outCount++; + } + } + dbgs() << "Block " << BB->getNameStr() << " in " + << BB->getParent()->getNameStr() << ":" + << "BBWeight=" << format("%20.20g",BBWeight) << "," + << "inWeight=" << format("%20.20g",inWeight) << "," + << "inCount=" << inCount << "," + << "outWeight=" << format("%20.20g",outWeight) << "," + << "outCount" << outCount << "\n"; + + // mark as visited and recurse into subnodes + BBisPrinted.insert(BB); + for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + bbi != bbe; ++bbi ) { + printDebugInfo(*bbi); + } + } + + template<class FType, class BType> + void ProfileVerifierPassT<FType, BType>::debugEntry (DetailedBlockInfo *DI) { + dbgs() << "TROUBLE: Block " << DI->BB->getNameStr() << " in " + << DI->BB->getParent()->getNameStr() << ":" + << "BBWeight=" << format("%20.20g",DI->BBWeight) << "," + << "inWeight=" << format("%20.20g",DI->inWeight) << "," + << "inCount=" << DI->inCount << "," + << "outWeight=" << format("%20.20g",DI->outWeight) << "," + << "outCount=" << DI->outCount << "\n"; + if (!PrintedDebugTree) { + PrintedDebugTree = true; + printDebugInfo(&(DI->BB->getParent()->getEntryBlock())); + } + } + + // This compares A and B for equality. + static bool Equals(double A, double B) { + return A == B; + } + + // This checks if the function "exit" is reachable from an given function + // via calls, this is necessary to check if a profile is valid despite the + // counts not fitting exactly. + template<class FType, class BType> + bool ProfileVerifierPassT<FType, BType>::exitReachable(const FType *F) { + if (!F) return false; + + if (FisVisited.count(F)) return false; + + FType *Exit = F->getParent()->getFunction("exit"); + if (Exit == F) { + return true; + } + + FisVisited.insert(F); + bool exits = false; + for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + if (const CallInst *CI = dyn_cast<CallInst>(&*I)) { + FType *F = CI->getCalledFunction(); + if (F) { + exits |= exitReachable(F); + } else { + // This is a call to a pointer, all bets are off... + exits = true; + } + if (exits) break; + } + } + return exits; + } + + #define ASSERTMESSAGE(M) \ + { dbgs() << "ASSERT:" << (M) << "\n"; \ + if (!DisableAssertions) assert(0 && (M)); } + + template<class FType, class BType> + double ProfileVerifierPassT<FType, BType>::ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge E) { + double EdgeWeight = PI->getEdgeWeight(E); + if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { + dbgs() << "Edge " << E << " in Function " + << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": "; + ASSERTMESSAGE("Edge has missing value"); + return 0; + } else { + if (EdgeWeight < 0) { + dbgs() << "Edge " << E << " in Function " + << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": "; + ASSERTMESSAGE("Edge has negative value"); + } + return EdgeWeight; + } + } + + template<class FType, class BType> + void ProfileVerifierPassT<FType, BType>::CheckValue(bool Error, + const char *Message, + DetailedBlockInfo *DI) { + if (Error) { + DEBUG(debugEntry(DI)); + dbgs() << "Block " << DI->BB->getNameStr() << " in Function " + << DI->BB->getParent()->getNameStr() << ": "; + ASSERTMESSAGE(Message); + } + return; + } + + // This calculates the Information for a block and then recurses into the + // successors. + template<class FType, class BType> + void ProfileVerifierPassT<FType, BType>::recurseBasicBlock(const BType *BB) { + + // Break the recursion by remembering all visited blocks. + if (BBisVisited.find(BB) != BBisVisited.end()) return; + + // Use a data structure to store all the information, this can then be handed + // to debug printers. + DetailedBlockInfo DI; + DI.BB = BB; + DI.outCount = DI.inCount = 0; + DI.inWeight = DI.outWeight = 0; + + // Read predecessors. + std::set<const BType*> ProcessedPreds; + const_pred_iterator bpi = pred_begin(BB), bpe = pred_end(BB); + // If there are none, check for (0,BB) edge. + if (bpi == bpe) { + DI.inWeight += ReadOrAssert(PI->getEdge(0,BB)); + DI.inCount++; + } + for (;bpi != bpe; ++bpi) { + if (ProcessedPreds.insert(*bpi).second) { + DI.inWeight += ReadOrAssert(PI->getEdge(*bpi,BB)); + DI.inCount++; + } + } + + // Read successors. + std::set<const BType*> ProcessedSuccs; + succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + // If there is an (0,BB) edge, consider it too. (This is done not only when + // there are no successors, but every time; not every function contains + // return blocks with no successors (think loop latch as return block)). + double w = PI->getEdgeWeight(PI->getEdge(BB,0)); + if (w != ProfileInfoT<FType, BType>::MissingValue) { + DI.outWeight += w; + DI.outCount++; + } + for (;bbi != bbe; ++bbi) { + if (ProcessedSuccs.insert(*bbi).second) { + DI.outWeight += ReadOrAssert(PI->getEdge(BB,*bbi)); + DI.outCount++; + } + } + + // Read block weight. + DI.BBWeight = PI->getExecutionCount(BB); + CheckValue(DI.BBWeight == ProfileInfoT<FType, BType>::MissingValue, + "BasicBlock has missing value", &DI); + CheckValue(DI.BBWeight < 0, + "BasicBlock has negative value", &DI); + + // Check if this block is a setjmp target. + bool isSetJmpTarget = false; + if (DI.outWeight > DI.inWeight) { + for (typename BType::const_iterator i = BB->begin(), ie = BB->end(); + i != ie; ++i) { + if (const CallInst *CI = dyn_cast<CallInst>(&*i)) { + FType *F = CI->getCalledFunction(); + if (F && (F->getName() == "_setjmp")) { + isSetJmpTarget = true; break; + } + } + } + } + // Check if this block is eventually reaching exit. + bool isExitReachable = false; + if (DI.inWeight > DI.outWeight) { + for (typename BType::const_iterator i = BB->begin(), ie = BB->end(); + i != ie; ++i) { + if (const CallInst *CI = dyn_cast<CallInst>(&*i)) { + FType *F = CI->getCalledFunction(); + if (F) { + FisVisited.clear(); + isExitReachable |= exitReachable(F); + } else { + // This is a call to a pointer, all bets are off... + isExitReachable = true; + } + if (isExitReachable) break; + } + } + } + + if (DI.inCount > 0 && DI.outCount == 0) { + // If this is a block with no successors. + if (!isSetJmpTarget) { + CheckValue(!Equals(DI.inWeight,DI.BBWeight), + "inWeight and BBWeight do not match", &DI); + } + } else if (DI.inCount == 0 && DI.outCount > 0) { + // If this is a block with no predecessors. + if (!isExitReachable) + CheckValue(!Equals(DI.BBWeight,DI.outWeight), + "BBWeight and outWeight do not match", &DI); + } else { + // If this block has successors and predecessors. + if (DI.inWeight > DI.outWeight && !isExitReachable) + CheckValue(!Equals(DI.inWeight,DI.outWeight), + "inWeight and outWeight do not match", &DI); + if (DI.inWeight < DI.outWeight && !isSetJmpTarget) + CheckValue(!Equals(DI.inWeight,DI.outWeight), + "inWeight and outWeight do not match", &DI); + } + + + // Mark this block as visited, rescurse into successors. + BBisVisited.insert(BB); + for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + bbi != bbe; ++bbi ) { + recurseBasicBlock(*bbi); + } + } + + template<class FType, class BType> + bool ProfileVerifierPassT<FType, BType>::runOnFunction(FType &F) { + PI = getAnalysisIfAvailable<ProfileInfoT<FType, BType> >(); + if (!PI) + ASSERTMESSAGE("No ProfileInfo available"); + + // Prepare global variables. + PrintedDebugTree = false; + BBisVisited.clear(); + + // Fetch entry block and recurse into it. + const BType *entry = &F.getEntryBlock(); + recurseBasicBlock(entry); + + if (PI->getExecutionCount(&F) != PI->getExecutionCount(entry)) + ASSERTMESSAGE("Function count and entry block count do not match"); + + return false; + } + + template<class FType, class BType> + char ProfileVerifierPassT<FType, BType>::ID = 0; +} + +INITIALIZE_PASS_BEGIN(ProfileVerifierPass, "profile-verifier", + "Verify profiling information", false, true) +INITIALIZE_AG_DEPENDENCY(ProfileInfo) +INITIALIZE_PASS_END(ProfileVerifierPass, "profile-verifier", + "Verify profiling information", false, true) + +namespace llvm { + FunctionPass *createProfileVerifierPass() { + return new ProfileVerifierPass(ProfileVerifierDisableAssertions); + } +} + diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp new file mode 100644 index 0000000..52753cb --- /dev/null +++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp @@ -0,0 +1,851 @@ +//===- RegionInfo.cpp - SESE region detection analysis --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Detects single entry single exit regions in the control flow graph. +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/RegionInfo.h" +#include "llvm/Analysis/RegionIterator.h" + +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Assembly/Writer.h" + +#define DEBUG_TYPE "region" +#include "llvm/Support/Debug.h" + +#include <set> +#include <algorithm> + +using namespace llvm; + +// Always verify if expensive checking is enabled. +#ifdef XDEBUG +static bool VerifyRegionInfo = true; +#else +static bool VerifyRegionInfo = false; +#endif + +static cl::opt<bool,true> +VerifyRegionInfoX("verify-region-info", cl::location(VerifyRegionInfo), + cl::desc("Verify region info (time consuming)")); + +STATISTIC(numRegions, "The # of regions"); +STATISTIC(numSimpleRegions, "The # of simple regions"); + +static cl::opt<enum Region::PrintStyle> printStyle("print-region-style", + cl::Hidden, + cl::desc("style of printing regions"), + cl::values( + clEnumValN(Region::PrintNone, "none", "print no details"), + clEnumValN(Region::PrintBB, "bb", + "print regions in detail with block_iterator"), + clEnumValN(Region::PrintRN, "rn", + "print regions in detail with element_iterator"), + clEnumValEnd)); +//===----------------------------------------------------------------------===// +/// Region Implementation +Region::Region(BasicBlock *Entry, BasicBlock *Exit, RegionInfo* RInfo, + DominatorTree *dt, Region *Parent) + : RegionNode(Parent, Entry, 1), RI(RInfo), DT(dt), exit(Exit) {} + +Region::~Region() { + // Free the cached nodes. + for (BBNodeMapT::iterator it = BBNodeMap.begin(), + ie = BBNodeMap.end(); it != ie; ++it) + delete it->second; + + // Only clean the cache for this Region. Caches of child Regions will be + // cleaned when the child Regions are deleted. + BBNodeMap.clear(); + + for (iterator I = begin(), E = end(); I != E; ++I) + delete *I; +} + +void Region::replaceEntry(BasicBlock *BB) { + entry.setPointer(BB); +} + +void Region::replaceExit(BasicBlock *BB) { + assert(exit && "No exit to replace!"); + exit = BB; +} + +bool Region::contains(const BasicBlock *B) const { + BasicBlock *BB = const_cast<BasicBlock*>(B); + + assert(DT->getNode(BB) && "BB not part of the dominance tree"); + + BasicBlock *entry = getEntry(), *exit = getExit(); + + // Toplevel region. + if (!exit) + return true; + + return (DT->dominates(entry, BB) + && !(DT->dominates(exit, BB) && DT->dominates(entry, exit))); +} + +bool Region::contains(const Loop *L) const { + // BBs that are not part of any loop are element of the Loop + // described by the NULL pointer. This loop is not part of any region, + // except if the region describes the whole function. + if (L == 0) + return getExit() == 0; + + if (!contains(L->getHeader())) + return false; + + SmallVector<BasicBlock *, 8> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + for (SmallVectorImpl<BasicBlock*>::iterator BI = ExitingBlocks.begin(), + BE = ExitingBlocks.end(); BI != BE; ++BI) + if (!contains(*BI)) + return false; + + return true; +} + +Loop *Region::outermostLoopInRegion(Loop *L) const { + if (!contains(L)) + return 0; + + while (L && contains(L->getParentLoop())) { + L = L->getParentLoop(); + } + + return L; +} + +Loop *Region::outermostLoopInRegion(LoopInfo *LI, BasicBlock* BB) const { + assert(LI && BB && "LI and BB cannot be null!"); + Loop *L = LI->getLoopFor(BB); + return outermostLoopInRegion(L); +} + +BasicBlock *Region::getEnteringBlock() const { + BasicBlock *entry = getEntry(); + BasicBlock *Pred; + BasicBlock *enteringBlock = 0; + + for (pred_iterator PI = pred_begin(entry), PE = pred_end(entry); PI != PE; + ++PI) { + Pred = *PI; + if (DT->getNode(Pred) && !contains(Pred)) { + if (enteringBlock) + return 0; + + enteringBlock = Pred; + } + } + + return enteringBlock; +} + +BasicBlock *Region::getExitingBlock() const { + BasicBlock *exit = getExit(); + BasicBlock *Pred; + BasicBlock *exitingBlock = 0; + + if (!exit) + return 0; + + for (pred_iterator PI = pred_begin(exit), PE = pred_end(exit); PI != PE; + ++PI) { + Pred = *PI; + if (contains(Pred)) { + if (exitingBlock) + return 0; + + exitingBlock = Pred; + } + } + + return exitingBlock; +} + +bool Region::isSimple() const { + return !isTopLevelRegion() && getEnteringBlock() && getExitingBlock(); +} + +std::string Region::getNameStr() const { + std::string exitName; + std::string entryName; + + if (getEntry()->getName().empty()) { + raw_string_ostream OS(entryName); + + WriteAsOperand(OS, getEntry(), false); + entryName = OS.str(); + } else + entryName = getEntry()->getNameStr(); + + if (getExit()) { + if (getExit()->getName().empty()) { + raw_string_ostream OS(exitName); + + WriteAsOperand(OS, getExit(), false); + exitName = OS.str(); + } else + exitName = getExit()->getNameStr(); + } else + exitName = "<Function Return>"; + + return entryName + " => " + exitName; +} + +void Region::verifyBBInRegion(BasicBlock *BB) const { + if (!contains(BB)) + llvm_unreachable("Broken region found!"); + + BasicBlock *entry = getEntry(), *exit = getExit(); + + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) + if (!contains(*SI) && exit != *SI) + llvm_unreachable("Broken region found!"); + + if (entry != BB) + for (pred_iterator SI = pred_begin(BB), SE = pred_end(BB); SI != SE; ++SI) + if (!contains(*SI)) + llvm_unreachable("Broken region found!"); +} + +void Region::verifyWalk(BasicBlock *BB, std::set<BasicBlock*> *visited) const { + BasicBlock *exit = getExit(); + + visited->insert(BB); + + verifyBBInRegion(BB); + + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) + if (*SI != exit && visited->find(*SI) == visited->end()) + verifyWalk(*SI, visited); +} + +void Region::verifyRegion() const { + // Only do verification when user wants to, otherwise this expensive + // check will be invoked by PassManager. + if (!VerifyRegionInfo) return; + + std::set<BasicBlock*> visited; + verifyWalk(getEntry(), &visited); +} + +void Region::verifyRegionNest() const { + for (Region::const_iterator RI = begin(), RE = end(); RI != RE; ++RI) + (*RI)->verifyRegionNest(); + + verifyRegion(); +} + +Region::block_iterator Region::block_begin() { + return GraphTraits<FlatIt<Region*> >::nodes_begin(this); +} + +Region::block_iterator Region::block_end() { + return GraphTraits<FlatIt<Region*> >::nodes_end(this); +} + +Region::const_block_iterator Region::block_begin() const { + return GraphTraits<FlatIt<const Region*> >::nodes_begin(this); +} + +Region::const_block_iterator Region::block_end() const { + return GraphTraits<FlatIt<const Region*> >::nodes_end(this); +} + +Region::element_iterator Region::element_begin() { + return GraphTraits<Region*>::nodes_begin(this); +} + +Region::element_iterator Region::element_end() { + return GraphTraits<Region*>::nodes_end(this); +} + +Region::const_element_iterator Region::element_begin() const { + return GraphTraits<const Region*>::nodes_begin(this); +} + +Region::const_element_iterator Region::element_end() const { + return GraphTraits<const Region*>::nodes_end(this); +} + +Region* Region::getSubRegionNode(BasicBlock *BB) const { + Region *R = RI->getRegionFor(BB); + + if (!R || R == this) + return 0; + + // If we pass the BB out of this region, that means our code is broken. + assert(contains(R) && "BB not in current region!"); + + while (contains(R->getParent()) && R->getParent() != this) + R = R->getParent(); + + if (R->getEntry() != BB) + return 0; + + return R; +} + +RegionNode* Region::getBBNode(BasicBlock *BB) const { + assert(contains(BB) && "Can get BB node out of this region!"); + + BBNodeMapT::const_iterator at = BBNodeMap.find(BB); + + if (at != BBNodeMap.end()) + return at->second; + + RegionNode *NewNode = new RegionNode(const_cast<Region*>(this), BB); + BBNodeMap.insert(std::make_pair(BB, NewNode)); + return NewNode; +} + +RegionNode* Region::getNode(BasicBlock *BB) const { + assert(contains(BB) && "Can get BB node out of this region!"); + if (Region* Child = getSubRegionNode(BB)) + return Child->getNode(); + + return getBBNode(BB); +} + +void Region::transferChildrenTo(Region *To) { + for (iterator I = begin(), E = end(); I != E; ++I) { + (*I)->parent = To; + To->children.push_back(*I); + } + children.clear(); +} + +void Region::addSubRegion(Region *SubRegion, bool moveChildren) { + assert(SubRegion->parent == 0 && "SubRegion already has a parent!"); + assert(std::find(begin(), end(), SubRegion) == children.end() + && "Subregion already exists!"); + + SubRegion->parent = this; + children.push_back(SubRegion); + + if (!moveChildren) + return; + + assert(SubRegion->children.size() == 0 + && "SubRegions that contain children are not supported"); + + for (element_iterator I = element_begin(), E = element_end(); I != E; ++I) + if (!(*I)->isSubRegion()) { + BasicBlock *BB = (*I)->getNodeAs<BasicBlock>(); + + if (SubRegion->contains(BB)) + RI->setRegionFor(BB, SubRegion); + } + + std::vector<Region*> Keep; + for (iterator I = begin(), E = end(); I != E; ++I) + if (SubRegion->contains(*I) && *I != SubRegion) { + SubRegion->children.push_back(*I); + (*I)->parent = SubRegion; + } else + Keep.push_back(*I); + + children.clear(); + children.insert(children.begin(), Keep.begin(), Keep.end()); +} + + +Region *Region::removeSubRegion(Region *Child) { + assert(Child->parent == this && "Child is not a child of this region!"); + Child->parent = 0; + RegionSet::iterator I = std::find(children.begin(), children.end(), Child); + assert(I != children.end() && "Region does not exit. Unable to remove."); + children.erase(children.begin()+(I-begin())); + return Child; +} + +unsigned Region::getDepth() const { + unsigned Depth = 0; + + for (Region *R = parent; R != 0; R = R->parent) + ++Depth; + + return Depth; +} + +Region *Region::getExpandedRegion() const { + unsigned NumSuccessors = exit->getTerminator()->getNumSuccessors(); + + if (NumSuccessors == 0) + return NULL; + + for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit()); + PI != PE; ++PI) + if (!DT->dominates(getEntry(), *PI)) + return NULL; + + Region *R = RI->getRegionFor(exit); + + if (R->getEntry() != exit) { + if (exit->getTerminator()->getNumSuccessors() == 1) + return new Region(getEntry(), *succ_begin(exit), RI, DT); + else + return NULL; + } + + while (R->getParent() && R->getParent()->getEntry() == exit) + R = R->getParent(); + + if (!DT->dominates(getEntry(), R->getExit())) + for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit()); + PI != PE; ++PI) + if (!DT->dominates(R->getExit(), *PI)) + return NULL; + + return new Region(getEntry(), R->getExit(), RI, DT); +} + +void Region::print(raw_ostream &OS, bool print_tree, unsigned level, + enum PrintStyle Style) const { + if (print_tree) + OS.indent(level*2) << "[" << level << "] " << getNameStr(); + else + OS.indent(level*2) << getNameStr(); + + OS << "\n"; + + + if (Style != PrintNone) { + OS.indent(level*2) << "{\n"; + OS.indent(level*2 + 2); + + if (Style == PrintBB) { + for (const_block_iterator I = block_begin(), E = block_end(); I!=E; ++I) + OS << **I << ", "; // TODO: remove the last "," + } else if (Style == PrintRN) { + for (const_element_iterator I = element_begin(), E = element_end(); I!=E; ++I) + OS << **I << ", "; // TODO: remove the last ", + } + + OS << "\n"; + } + + if (print_tree) + for (const_iterator RI = begin(), RE = end(); RI != RE; ++RI) + (*RI)->print(OS, print_tree, level+1, Style); + + if (Style != PrintNone) + OS.indent(level*2) << "} \n"; +} + +void Region::dump() const { + print(dbgs(), true, getDepth(), printStyle.getValue()); +} + +void Region::clearNodeCache() { + // Free the cached nodes. + for (BBNodeMapT::iterator I = BBNodeMap.begin(), + IE = BBNodeMap.end(); I != IE; ++I) + delete I->second; + + BBNodeMap.clear(); + for (Region::iterator RI = begin(), RE = end(); RI != RE; ++RI) + (*RI)->clearNodeCache(); +} + +//===----------------------------------------------------------------------===// +// RegionInfo implementation +// + +bool RegionInfo::isCommonDomFrontier(BasicBlock *BB, BasicBlock *entry, + BasicBlock *exit) const { + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { + BasicBlock *P = *PI; + if (DT->dominates(entry, P) && !DT->dominates(exit, P)) + return false; + } + return true; +} + +bool RegionInfo::isRegion(BasicBlock *entry, BasicBlock *exit) const { + assert(entry && exit && "entry and exit must not be null!"); + typedef DominanceFrontier::DomSetType DST; + + DST *entrySuccs = &DF->find(entry)->second; + + // Exit is the header of a loop that contains the entry. In this case, + // the dominance frontier must only contain the exit. + if (!DT->dominates(entry, exit)) { + for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end(); + SI != SE; ++SI) + if (*SI != exit && *SI != entry) + return false; + + return true; + } + + DST *exitSuccs = &DF->find(exit)->second; + + // Do not allow edges leaving the region. + for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end(); + SI != SE; ++SI) { + if (*SI == exit || *SI == entry) + continue; + if (exitSuccs->find(*SI) == exitSuccs->end()) + return false; + if (!isCommonDomFrontier(*SI, entry, exit)) + return false; + } + + // Do not allow edges pointing into the region. + for (DST::iterator SI = exitSuccs->begin(), SE = exitSuccs->end(); + SI != SE; ++SI) + if (DT->properlyDominates(entry, *SI) && *SI != exit) + return false; + + + return true; +} + +void RegionInfo::insertShortCut(BasicBlock *entry, BasicBlock *exit, + BBtoBBMap *ShortCut) const { + assert(entry && exit && "entry and exit must not be null!"); + + BBtoBBMap::iterator e = ShortCut->find(exit); + + if (e == ShortCut->end()) + // No further region at exit available. + (*ShortCut)[entry] = exit; + else { + // We found a region e that starts at exit. Therefore (entry, e->second) + // is also a region, that is larger than (entry, exit). Insert the + // larger one. + BasicBlock *BB = e->second; + (*ShortCut)[entry] = BB; + } +} + +DomTreeNode* RegionInfo::getNextPostDom(DomTreeNode* N, + BBtoBBMap *ShortCut) const { + BBtoBBMap::iterator e = ShortCut->find(N->getBlock()); + + if (e == ShortCut->end()) + return N->getIDom(); + + return PDT->getNode(e->second)->getIDom(); +} + +bool RegionInfo::isTrivialRegion(BasicBlock *entry, BasicBlock *exit) const { + assert(entry && exit && "entry and exit must not be null!"); + + unsigned num_successors = succ_end(entry) - succ_begin(entry); + + if (num_successors <= 1 && exit == *(succ_begin(entry))) + return true; + + return false; +} + +void RegionInfo::updateStatistics(Region *R) { + ++numRegions; + + // TODO: Slow. Should only be enabled if -stats is used. + if (R->isSimple()) ++numSimpleRegions; +} + +Region *RegionInfo::createRegion(BasicBlock *entry, BasicBlock *exit) { + assert(entry && exit && "entry and exit must not be null!"); + + if (isTrivialRegion(entry, exit)) + return 0; + + Region *region = new Region(entry, exit, this, DT); + BBtoRegion.insert(std::make_pair(entry, region)); + + #ifdef XDEBUG + region->verifyRegion(); + #else + DEBUG(region->verifyRegion()); + #endif + + updateStatistics(region); + return region; +} + +void RegionInfo::findRegionsWithEntry(BasicBlock *entry, BBtoBBMap *ShortCut) { + assert(entry); + + DomTreeNode *N = PDT->getNode(entry); + + if (!N) + return; + + Region *lastRegion= 0; + BasicBlock *lastExit = entry; + + // As only a BasicBlock that postdominates entry can finish a region, walk the + // post dominance tree upwards. + while ((N = getNextPostDom(N, ShortCut))) { + BasicBlock *exit = N->getBlock(); + + if (!exit) + break; + + if (isRegion(entry, exit)) { + Region *newRegion = createRegion(entry, exit); + + if (lastRegion) + newRegion->addSubRegion(lastRegion); + + lastRegion = newRegion; + lastExit = exit; + } + + // This can never be a region, so stop the search. + if (!DT->dominates(entry, exit)) + break; + } + + // Tried to create regions from entry to lastExit. Next time take a + // shortcut from entry to lastExit. + if (lastExit != entry) + insertShortCut(entry, lastExit, ShortCut); +} + +void RegionInfo::scanForRegions(Function &F, BBtoBBMap *ShortCut) { + BasicBlock *entry = &(F.getEntryBlock()); + DomTreeNode *N = DT->getNode(entry); + + // Iterate over the dominance tree in post order to start with the small + // regions from the bottom of the dominance tree. If the small regions are + // detected first, detection of bigger regions is faster, as we can jump + // over the small regions. + for (po_iterator<DomTreeNode*> FI = po_begin(N), FE = po_end(N); FI != FE; + ++FI) { + findRegionsWithEntry(FI->getBlock(), ShortCut); + } +} + +Region *RegionInfo::getTopMostParent(Region *region) { + while (region->parent) + region = region->getParent(); + + return region; +} + +void RegionInfo::buildRegionsTree(DomTreeNode *N, Region *region) { + BasicBlock *BB = N->getBlock(); + + // Passed region exit + while (BB == region->getExit()) + region = region->getParent(); + + BBtoRegionMap::iterator it = BBtoRegion.find(BB); + + // This basic block is a start block of a region. It is already in the + // BBtoRegion relation. Only the child basic blocks have to be updated. + if (it != BBtoRegion.end()) { + Region *newRegion = it->second;; + region->addSubRegion(getTopMostParent(newRegion)); + region = newRegion; + } else { + BBtoRegion[BB] = region; + } + + for (DomTreeNode::iterator CI = N->begin(), CE = N->end(); CI != CE; ++CI) + buildRegionsTree(*CI, region); +} + +void RegionInfo::releaseMemory() { + BBtoRegion.clear(); + if (TopLevelRegion) + delete TopLevelRegion; + TopLevelRegion = 0; +} + +RegionInfo::RegionInfo() : FunctionPass(ID) { + initializeRegionInfoPass(*PassRegistry::getPassRegistry()); + TopLevelRegion = 0; +} + +RegionInfo::~RegionInfo() { + releaseMemory(); +} + +void RegionInfo::Calculate(Function &F) { + // ShortCut a function where for every BB the exit of the largest region + // starting with BB is stored. These regions can be threated as single BBS. + // This improves performance on linear CFGs. + BBtoBBMap ShortCut; + + scanForRegions(F, &ShortCut); + BasicBlock *BB = &F.getEntryBlock(); + buildRegionsTree(DT->getNode(BB), TopLevelRegion); +} + +bool RegionInfo::runOnFunction(Function &F) { + releaseMemory(); + + DT = &getAnalysis<DominatorTree>(); + PDT = &getAnalysis<PostDominatorTree>(); + DF = &getAnalysis<DominanceFrontier>(); + + TopLevelRegion = new Region(&F.getEntryBlock(), 0, this, DT, 0); + updateStatistics(TopLevelRegion); + + Calculate(F); + + return false; +} + +void RegionInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<DominatorTree>(); + AU.addRequired<PostDominatorTree>(); + AU.addRequired<DominanceFrontier>(); +} + +void RegionInfo::print(raw_ostream &OS, const Module *) const { + OS << "Region tree:\n"; + TopLevelRegion->print(OS, true, 0, printStyle.getValue()); + OS << "End region tree\n"; +} + +void RegionInfo::verifyAnalysis() const { + // Only do verification when user wants to, otherwise this expensive check + // will be invoked by PMDataManager::verifyPreservedAnalysis when + // a regionpass (marked PreservedAll) finish. + if (!VerifyRegionInfo) return; + + TopLevelRegion->verifyRegionNest(); +} + +// Region pass manager support. +Region *RegionInfo::getRegionFor(BasicBlock *BB) const { + BBtoRegionMap::const_iterator I= + BBtoRegion.find(BB); + return I != BBtoRegion.end() ? I->second : 0; +} + +void RegionInfo::setRegionFor(BasicBlock *BB, Region *R) { + BBtoRegion[BB] = R; +} + +Region *RegionInfo::operator[](BasicBlock *BB) const { + return getRegionFor(BB); +} + +BasicBlock *RegionInfo::getMaxRegionExit(BasicBlock *BB) const { + BasicBlock *Exit = NULL; + + while (true) { + // Get largest region that starts at BB. + Region *R = getRegionFor(BB); + while (R && R->getParent() && R->getParent()->getEntry() == BB) + R = R->getParent(); + + // Get the single exit of BB. + if (R && R->getEntry() == BB) + Exit = R->getExit(); + else if (++succ_begin(BB) == succ_end(BB)) + Exit = *succ_begin(BB); + else // No single exit exists. + return Exit; + + // Get largest region that starts at Exit. + Region *ExitR = getRegionFor(Exit); + while (ExitR && ExitR->getParent() + && ExitR->getParent()->getEntry() == Exit) + ExitR = ExitR->getParent(); + + for (pred_iterator PI = pred_begin(Exit), PE = pred_end(Exit); PI != PE; + ++PI) + if (!R->contains(*PI) && !ExitR->contains(*PI)) + break; + + // This stops infinite cycles. + if (DT->dominates(Exit, BB)) + break; + + BB = Exit; + } + + return Exit; +} + +Region* +RegionInfo::getCommonRegion(Region *A, Region *B) const { + assert (A && B && "One of the Regions is NULL"); + + if (A->contains(B)) return A; + + while (!B->contains(A)) + B = B->getParent(); + + return B; +} + +Region* +RegionInfo::getCommonRegion(SmallVectorImpl<Region*> &Regions) const { + Region* ret = Regions.back(); + Regions.pop_back(); + + for (SmallVectorImpl<Region*>::const_iterator I = Regions.begin(), + E = Regions.end(); I != E; ++I) + ret = getCommonRegion(ret, *I); + + return ret; +} + +Region* +RegionInfo::getCommonRegion(SmallVectorImpl<BasicBlock*> &BBs) const { + Region* ret = getRegionFor(BBs.back()); + BBs.pop_back(); + + for (SmallVectorImpl<BasicBlock*>::const_iterator I = BBs.begin(), + E = BBs.end(); I != E; ++I) + ret = getCommonRegion(ret, getRegionFor(*I)); + + return ret; +} + +void RegionInfo::splitBlock(BasicBlock* NewBB, BasicBlock *OldBB) +{ + Region *R = getRegionFor(OldBB); + + setRegionFor(NewBB, R); + + while (R->getEntry() == OldBB && !R->isTopLevelRegion()) { + R->replaceEntry(NewBB); + R = R->getParent(); + } + + setRegionFor(OldBB, R); +} + +char RegionInfo::ID = 0; +INITIALIZE_PASS_BEGIN(RegionInfo, "regions", + "Detect single entry single exit regions", true, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) +INITIALIZE_PASS_DEPENDENCY(DominanceFrontier) +INITIALIZE_PASS_END(RegionInfo, "regions", + "Detect single entry single exit regions", true, true) + +// Create methods available outside of this file, to use them +// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by +// the link time optimization. + +namespace llvm { + FunctionPass *createRegionInfoPass() { + return new RegionInfo(); + } +} + diff --git a/contrib/llvm/lib/Analysis/RegionPass.cpp b/contrib/llvm/lib/Analysis/RegionPass.cpp new file mode 100644 index 0000000..80eda79 --- /dev/null +++ b/contrib/llvm/lib/Analysis/RegionPass.cpp @@ -0,0 +1,275 @@ +//===- RegionPass.cpp - Region Pass and Region Pass Manager ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements RegionPass and RGPassManager. All region optimization +// and transformation passes are derived from RegionPass. RGPassManager is +// responsible for managing RegionPasses. +// most of these codes are COPY from LoopPass.cpp +// +//===----------------------------------------------------------------------===// +#include "llvm/Analysis/RegionPass.h" +#include "llvm/Analysis/RegionIterator.h" +#include "llvm/Support/Timer.h" + +#define DEBUG_TYPE "regionpassmgr" +#include "llvm/Support/Debug.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// RGPassManager +// + +char RGPassManager::ID = 0; + +RGPassManager::RGPassManager(int Depth) + : FunctionPass(ID), PMDataManager(Depth) { + skipThisRegion = false; + redoThisRegion = false; + RI = NULL; + CurrentRegion = NULL; +} + +// Recurse through all subregions and all regions into RQ. +static void addRegionIntoQueue(Region *R, std::deque<Region *> &RQ) { + RQ.push_back(R); + for (Region::iterator I = R->begin(), E = R->end(); I != E; ++I) + addRegionIntoQueue(*I, RQ); +} + +/// Pass Manager itself does not invalidate any analysis info. +void RGPassManager::getAnalysisUsage(AnalysisUsage &Info) const { + Info.addRequired<RegionInfo>(); + Info.setPreservesAll(); +} + +/// run - Execute all of the passes scheduled for execution. Keep track of +/// whether any of the passes modifies the function, and if so, return true. +bool RGPassManager::runOnFunction(Function &F) { + RI = &getAnalysis<RegionInfo>(); + bool Changed = false; + + // Collect inherited analysis from Module level pass manager. + populateInheritedAnalysis(TPM->activeStack); + + addRegionIntoQueue(RI->getTopLevelRegion(), RQ); + + if (RQ.empty()) // No regions, skip calling finalizers + return false; + + // Initialization + for (std::deque<Region *>::const_iterator I = RQ.begin(), E = RQ.end(); + I != E; ++I) { + Region *R = *I; + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + RegionPass *RP = (RegionPass *)getContainedPass(Index); + Changed |= RP->doInitialization(R, *this); + } + } + + // Walk Regions + while (!RQ.empty()) { + + CurrentRegion = RQ.back(); + skipThisRegion = false; + redoThisRegion = false; + + // Run all passes on the current Region. + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + RegionPass *P = (RegionPass*)getContainedPass(Index); + + dumpPassInfo(P, EXECUTION_MSG, ON_REGION_MSG, + CurrentRegion->getNameStr()); + dumpRequiredSet(P); + + initializeAnalysisImpl(P); + + { + PassManagerPrettyStackEntry X(P, *CurrentRegion->getEntry()); + + TimeRegion PassTimer(getPassTimer(P)); + Changed |= P->runOnRegion(CurrentRegion, *this); + } + + if (Changed) + dumpPassInfo(P, MODIFICATION_MSG, ON_REGION_MSG, + skipThisRegion ? "<deleted>" : + CurrentRegion->getNameStr()); + dumpPreservedSet(P); + + if (!skipThisRegion) { + // Manually check that this region is still healthy. This is done + // instead of relying on RegionInfo::verifyRegion since RegionInfo + // is a function pass and it's really expensive to verify every + // Region in the function every time. That level of checking can be + // enabled with the -verify-region-info option. + { + TimeRegion PassTimer(getPassTimer(P)); + CurrentRegion->verifyRegion(); + } + + // Then call the regular verifyAnalysis functions. + verifyPreservedAnalysis(P); + } + + removeNotPreservedAnalysis(P); + recordAvailableAnalysis(P); + removeDeadPasses(P, + skipThisRegion ? "<deleted>" : + CurrentRegion->getNameStr(), + ON_REGION_MSG); + + if (skipThisRegion) + // Do not run other passes on this region. + break; + } + + // If the region was deleted, release all the region passes. This frees up + // some memory, and avoids trouble with the pass manager trying to call + // verifyAnalysis on them. + if (skipThisRegion) + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + freePass(P, "<deleted>", ON_REGION_MSG); + } + + // Pop the region from queue after running all passes. + RQ.pop_back(); + + if (redoThisRegion) + RQ.push_back(CurrentRegion); + + // Free all region nodes created in region passes. + RI->clearNodeCache(); + } + + // Finalization + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + RegionPass *P = (RegionPass*)getContainedPass(Index); + Changed |= P->doFinalization(); + } + + // Print the region tree after all pass. + DEBUG( + dbgs() << "\nRegion tree of function " << F.getName() + << " after all region Pass:\n"; + RI->dump(); + dbgs() << "\n"; + ); + + return Changed; +} + +/// Print passes managed by this manager +void RGPassManager::dumpPassStructure(unsigned Offset) { + errs().indent(Offset*2) << "Region Pass Manager\n"; + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + P->dumpPassStructure(Offset + 1); + dumpLastUses(P, Offset+1); + } +} + +namespace { +//===----------------------------------------------------------------------===// +// PrintRegionPass +class PrintRegionPass : public RegionPass { +private: + std::string Banner; + raw_ostream &Out; // raw_ostream to print on. + +public: + static char ID; + PrintRegionPass() : RegionPass(ID), Out(dbgs()) {} + PrintRegionPass(const std::string &B, raw_ostream &o) + : RegionPass(ID), Banner(B), Out(o) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + virtual bool runOnRegion(Region *R, RGPassManager &RGM) { + Out << Banner; + for (Region::block_iterator I = R->block_begin(), E = R->block_end(); + I != E; ++I) + (*I)->getEntry()->print(Out); + + return false; + } +}; + +char PrintRegionPass::ID = 0; +} //end anonymous namespace + +//===----------------------------------------------------------------------===// +// RegionPass + +// Check if this pass is suitable for the current RGPassManager, if +// available. This pass P is not suitable for a RGPassManager if P +// is not preserving higher level analysis info used by other +// RGPassManager passes. In such case, pop RGPassManager from the +// stack. This will force assignPassManager() to create new +// LPPassManger as expected. +void RegionPass::preparePassManager(PMStack &PMS) { + + // Find RGPassManager + while (!PMS.empty() && + PMS.top()->getPassManagerType() > PMT_RegionPassManager) + PMS.pop(); + + + // If this pass is destroying high level information that is used + // by other passes that are managed by LPM then do not insert + // this pass in current LPM. Use new RGPassManager. + if (PMS.top()->getPassManagerType() == PMT_RegionPassManager && + !PMS.top()->preserveHigherLevelAnalysis(this)) + PMS.pop(); +} + +/// Assign pass manager to manage this pass. +void RegionPass::assignPassManager(PMStack &PMS, + PassManagerType PreferredType) { + // Find RGPassManager + while (!PMS.empty() && + PMS.top()->getPassManagerType() > PMT_RegionPassManager) + PMS.pop(); + + RGPassManager *RGPM; + + // Create new Region Pass Manager if it does not exist. + if (PMS.top()->getPassManagerType() == PMT_RegionPassManager) + RGPM = (RGPassManager*)PMS.top(); + else { + + assert (!PMS.empty() && "Unable to create Region Pass Manager"); + PMDataManager *PMD = PMS.top(); + + // [1] Create new Region Pass Manager + RGPM = new RGPassManager(PMD->getDepth() + 1); + RGPM->populateInheritedAnalysis(PMS); + + // [2] Set up new manager's top level manager + PMTopLevelManager *TPM = PMD->getTopLevelManager(); + TPM->addIndirectPassManager(RGPM); + + // [3] Assign manager to manage this new manager. This may create + // and push new managers into PMS + TPM->schedulePass(RGPM); + + // [4] Push new manager into PMS + PMS.push(RGPM); + } + + RGPM->add(this); +} + +/// Get the printer pass +Pass *RegionPass::createPrinterPass(raw_ostream &O, + const std::string &Banner) const { + return new PrintRegionPass(Banner, O); +} diff --git a/contrib/llvm/lib/Analysis/RegionPrinter.cpp b/contrib/llvm/lib/Analysis/RegionPrinter.cpp new file mode 100644 index 0000000..a1730b0 --- /dev/null +++ b/contrib/llvm/lib/Analysis/RegionPrinter.cpp @@ -0,0 +1,220 @@ +//===- RegionPrinter.cpp - Print regions tree pass ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Print out the region tree of a function using dotty/graphviz. +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/RegionInfo.h" +#include "llvm/Analysis/RegionIterator.h" +#include "llvm/Analysis/RegionPrinter.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/DOTGraphTraitsPass.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +/// onlySimpleRegion - Show only the simple regions in the RegionViewer. +static cl::opt<bool> +onlySimpleRegions("only-simple-regions", + cl::desc("Show only simple regions in the graphviz viewer"), + cl::Hidden, + cl::init(false)); + +namespace llvm { +template<> +struct DOTGraphTraits<RegionNode*> : public DefaultDOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) + : DefaultDOTGraphTraits(isSimple) {} + + std::string getNodeLabel(RegionNode *Node, RegionNode *Graph) { + + if (!Node->isSubRegion()) { + BasicBlock *BB = Node->getNodeAs<BasicBlock>(); + + if (isSimple()) + return DOTGraphTraits<const Function*> + ::getSimpleNodeLabel(BB, BB->getParent()); + else + return DOTGraphTraits<const Function*> + ::getCompleteNodeLabel(BB, BB->getParent()); + } + + return "Not implemented"; + } +}; + +template<> +struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> { + + DOTGraphTraits (bool isSimple=false) + : DOTGraphTraits<RegionNode*>(isSimple) {} + + static std::string getGraphName(RegionInfo *DT) { + return "Region Graph"; + } + + std::string getNodeLabel(RegionNode *Node, RegionInfo *G) { + return DOTGraphTraits<RegionNode*>::getNodeLabel(Node, + G->getTopLevelRegion()); + } + + std::string getEdgeAttributes(RegionNode *srcNode, + GraphTraits<RegionInfo*>::ChildIteratorType CI, RegionInfo *RI) { + + RegionNode *destNode = *CI; + + if (srcNode->isSubRegion() || destNode->isSubRegion()) + return ""; + + // In case of a backedge, do not use it to define the layout of the nodes. + BasicBlock *srcBB = srcNode->getNodeAs<BasicBlock>(); + BasicBlock *destBB = destNode->getNodeAs<BasicBlock>(); + + Region *R = RI->getRegionFor(destBB); + + while (R && R->getParent()) + if (R->getParent()->getEntry() == destBB) + R = R->getParent(); + else + break; + + if (R->getEntry() == destBB && R->contains(srcBB)) + return "constraint=false"; + + return ""; + } + + // Print the cluster of the subregions. This groups the single basic blocks + // and adds a different background color for each group. + static void printRegionCluster(const Region *R, GraphWriter<RegionInfo*> &GW, + unsigned depth = 0) { + raw_ostream &O = GW.getOStream(); + O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(R) + << " {\n"; + O.indent(2 * (depth + 1)) << "label = \"\";\n"; + + if (!onlySimpleRegions || R->isSimple()) { + O.indent(2 * (depth + 1)) << "style = filled;\n"; + O.indent(2 * (depth + 1)) << "color = " + << ((R->getDepth() * 2 % 12) + 1) << "\n"; + + } else { + O.indent(2 * (depth + 1)) << "style = solid;\n"; + O.indent(2 * (depth + 1)) << "color = " + << ((R->getDepth() * 2 % 12) + 2) << "\n"; + } + + for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI) + printRegionCluster(*RI, GW, depth + 1); + + RegionInfo *RI = R->getRegionInfo(); + + for (Region::const_block_iterator BI = R->block_begin(), + BE = R->block_end(); BI != BE; ++BI) { + BasicBlock *BB = (*BI)->getNodeAs<BasicBlock>(); + if (RI->getRegionFor(BB) == R) + O.indent(2 * (depth + 1)) << "Node" + << static_cast<const void*>(RI->getTopLevelRegion()->getBBNode(BB)) + << ";\n"; + } + + O.indent(2 * depth) << "}\n"; + } + + static void addCustomGraphFeatures(const RegionInfo* RI, + GraphWriter<RegionInfo*> &GW) { + raw_ostream &O = GW.getOStream(); + O << "\tcolorscheme = \"paired12\"\n"; + printRegionCluster(RI->getTopLevelRegion(), GW, 4); + } +}; +} //end namespace llvm + +namespace { + +struct RegionViewer + : public DOTGraphTraitsViewer<RegionInfo, false> { + static char ID; + RegionViewer() : DOTGraphTraitsViewer<RegionInfo, false>("reg", ID){ + initializeRegionViewerPass(*PassRegistry::getPassRegistry()); + } +}; +char RegionViewer::ID = 0; + +struct RegionOnlyViewer + : public DOTGraphTraitsViewer<RegionInfo, true> { + static char ID; + RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfo, true>("regonly", ID) { + initializeRegionOnlyViewerPass(*PassRegistry::getPassRegistry()); + } +}; +char RegionOnlyViewer::ID = 0; + +struct RegionPrinter + : public DOTGraphTraitsPrinter<RegionInfo, false> { + static char ID; + RegionPrinter() : + DOTGraphTraitsPrinter<RegionInfo, false>("reg", ID) { + initializeRegionPrinterPass(*PassRegistry::getPassRegistry()); + } +}; +char RegionPrinter::ID = 0; +} //end anonymous namespace + +INITIALIZE_PASS(RegionPrinter, "dot-regions", + "Print regions of function to 'dot' file", true, true) + +INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function", + true, true) + +INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only", + "View regions of function (with no function bodies)", + true, true) + +namespace { + +struct RegionOnlyPrinter + : public DOTGraphTraitsPrinter<RegionInfo, true> { + static char ID; + RegionOnlyPrinter() : + DOTGraphTraitsPrinter<RegionInfo, true>("reg", ID) { + initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry()); + } +}; + +} + +char RegionOnlyPrinter::ID = 0; +INITIALIZE_PASS(RegionOnlyPrinter, "dot-regions-only", + "Print regions of function to 'dot' file " + "(with no function bodies)", + true, true) + +FunctionPass* llvm::createRegionViewerPass() { + return new RegionViewer(); +} + +FunctionPass* llvm::createRegionOnlyViewerPass() { + return new RegionOnlyViewer(); +} + +FunctionPass* llvm::createRegionPrinterPass() { + return new RegionPrinter(); +} + +FunctionPass* llvm::createRegionOnlyPrinterPass() { + return new RegionOnlyPrinter(); +} + diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp new file mode 100644 index 0000000..025718e --- /dev/null +++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp @@ -0,0 +1,6432 @@ +//===- ScalarEvolution.cpp - Scalar Evolution Analysis ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the scalar evolution analysis +// engine, which is used primarily to analyze expressions involving induction +// variables in loops. +// +// There are several aspects to this library. First is the representation of +// scalar expressions, which are represented as subclasses of the SCEV class. +// These classes are used to represent certain types of subexpressions that we +// can handle. We only create one SCEV of a particular shape, so +// pointer-comparisons for equality are legal. +// +// One important aspect of the SCEV objects is that they are never cyclic, even +// if there is a cycle in the dataflow for an expression (ie, a PHI node). If +// the PHI node is one of the idioms that we can represent (e.g., a polynomial +// recurrence) then we represent it directly as a recurrence node, otherwise we +// represent it as a SCEVUnknown node. +// +// In addition to being able to represent expressions of various types, we also +// have folders that are used to build the *canonical* representation for a +// particular expression. These folders are capable of using a variety of +// rewrite rules to simplify the expressions. +// +// Once the folders are defined, we can implement the more interesting +// higher-level code, such as the code that recognizes PHI nodes of various +// types, computes the execution count of a loop, etc. +// +// TODO: We should use these routines and value representations to implement +// dependence analysis! +// +//===----------------------------------------------------------------------===// +// +// There are several good references for the techniques used in this analysis. +// +// Chains of recurrences -- a method to expedite the evaluation +// of closed-form functions +// Olaf Bachmann, Paul S. Wang, Eugene V. Zima +// +// On computational properties of chains of recurrences +// Eugene V. Zima +// +// Symbolic Evaluation of Chains of Recurrences for Loop Optimization +// Robert A. van Engelen +// +// Efficient Symbolic Analysis for Optimizing Compilers +// Robert A. van Engelen +// +// Using the chains of recurrences algebra for data dependence testing and +// induction variable substitution +// MS Thesis, Johnie Birch +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "scalar-evolution" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/GlobalVariable.h" +#include "llvm/GlobalAlias.h" +#include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" +#include "llvm/Operator.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ConstantRange.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include <algorithm> +using namespace llvm; + +STATISTIC(NumArrayLenItCounts, + "Number of trip counts computed with array length"); +STATISTIC(NumTripCountsComputed, + "Number of loops with predictable loop counts"); +STATISTIC(NumTripCountsNotComputed, + "Number of loops without predictable loop counts"); +STATISTIC(NumBruteForceTripCountsComputed, + "Number of loops with trip counts computed by force"); + +static cl::opt<unsigned> +MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, + cl::desc("Maximum number of iterations SCEV will " + "symbolically execute a constant " + "derived loop"), + cl::init(100)); + +INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution", + "Scalar Evolution Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution", + "Scalar Evolution Analysis", false, true) +char ScalarEvolution::ID = 0; + +//===----------------------------------------------------------------------===// +// SCEV class definitions +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Implementation of the SCEV class. +// + +void SCEV::dump() const { + print(dbgs()); + dbgs() << '\n'; +} + +void SCEV::print(raw_ostream &OS) const { + switch (getSCEVType()) { + case scConstant: + WriteAsOperand(OS, cast<SCEVConstant>(this)->getValue(), false); + return; + case scTruncate: { + const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this); + const SCEV *Op = Trunc->getOperand(); + OS << "(trunc " << *Op->getType() << " " << *Op << " to " + << *Trunc->getType() << ")"; + return; + } + case scZeroExtend: { + const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this); + const SCEV *Op = ZExt->getOperand(); + OS << "(zext " << *Op->getType() << " " << *Op << " to " + << *ZExt->getType() << ")"; + return; + } + case scSignExtend: { + const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this); + const SCEV *Op = SExt->getOperand(); + OS << "(sext " << *Op->getType() << " " << *Op << " to " + << *SExt->getType() << ")"; + return; + } + case scAddRecExpr: { + const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this); + OS << "{" << *AR->getOperand(0); + for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i) + OS << ",+," << *AR->getOperand(i); + OS << "}<"; + if (AR->getNoWrapFlags(FlagNUW)) + OS << "nuw><"; + if (AR->getNoWrapFlags(FlagNSW)) + OS << "nsw><"; + if (AR->getNoWrapFlags(FlagNW) && + !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW))) + OS << "nw><"; + WriteAsOperand(OS, AR->getLoop()->getHeader(), /*PrintType=*/false); + OS << ">"; + return; + } + case scAddExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: { + const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this); + const char *OpStr = 0; + switch (NAry->getSCEVType()) { + case scAddExpr: OpStr = " + "; break; + case scMulExpr: OpStr = " * "; break; + case scUMaxExpr: OpStr = " umax "; break; + case scSMaxExpr: OpStr = " smax "; break; + } + OS << "("; + for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); + I != E; ++I) { + OS << **I; + if (llvm::next(I) != E) + OS << OpStr; + } + OS << ")"; + return; + } + case scUDivExpr: { + const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this); + OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")"; + return; + } + case scUnknown: { + const SCEVUnknown *U = cast<SCEVUnknown>(this); + const Type *AllocTy; + if (U->isSizeOf(AllocTy)) { + OS << "sizeof(" << *AllocTy << ")"; + return; + } + if (U->isAlignOf(AllocTy)) { + OS << "alignof(" << *AllocTy << ")"; + return; + } + + const Type *CTy; + Constant *FieldNo; + if (U->isOffsetOf(CTy, FieldNo)) { + OS << "offsetof(" << *CTy << ", "; + WriteAsOperand(OS, FieldNo, false); + OS << ")"; + return; + } + + // Otherwise just print it normally. + WriteAsOperand(OS, U->getValue(), false); + return; + } + case scCouldNotCompute: + OS << "***COULDNOTCOMPUTE***"; + return; + default: break; + } + llvm_unreachable("Unknown SCEV kind!"); +} + +const Type *SCEV::getType() const { + switch (getSCEVType()) { + case scConstant: + return cast<SCEVConstant>(this)->getType(); + case scTruncate: + case scZeroExtend: + case scSignExtend: + return cast<SCEVCastExpr>(this)->getType(); + case scAddRecExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: + return cast<SCEVNAryExpr>(this)->getType(); + case scAddExpr: + return cast<SCEVAddExpr>(this)->getType(); + case scUDivExpr: + return cast<SCEVUDivExpr>(this)->getType(); + case scUnknown: + return cast<SCEVUnknown>(this)->getType(); + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + return 0; + default: break; + } + llvm_unreachable("Unknown SCEV kind!"); + return 0; +} + +bool SCEV::isZero() const { + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this)) + return SC->getValue()->isZero(); + return false; +} + +bool SCEV::isOne() const { + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this)) + return SC->getValue()->isOne(); + return false; +} + +bool SCEV::isAllOnesValue() const { + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this)) + return SC->getValue()->isAllOnesValue(); + return false; +} + +SCEVCouldNotCompute::SCEVCouldNotCompute() : + SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {} + +bool SCEVCouldNotCompute::classof(const SCEV *S) { + return S->getSCEVType() == scCouldNotCompute; +} + +const SCEV *ScalarEvolution::getConstant(ConstantInt *V) { + FoldingSetNodeID ID; + ID.AddInteger(scConstant); + ID.AddPointer(V); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + +const SCEV *ScalarEvolution::getConstant(const APInt& Val) { + return getConstant(ConstantInt::get(getContext(), Val)); +} + +const SCEV * +ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) { + const IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty)); + return getConstant(ConstantInt::get(ITy, V, isSigned)); +} + +SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, + unsigned SCEVTy, const SCEV *op, const Type *ty) + : SCEV(ID, SCEVTy), Op(op), Ty(ty) {} + +SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, + const SCEV *op, const Type *ty) + : SCEVCastExpr(ID, scTruncate, op, ty) { + assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot truncate non-integer value!"); +} + +SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID, + const SCEV *op, const Type *ty) + : SCEVCastExpr(ID, scZeroExtend, op, ty) { + assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot zero extend non-integer value!"); +} + +SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID, + const SCEV *op, const Type *ty) + : SCEVCastExpr(ID, scSignExtend, op, ty) { + assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot sign extend non-integer value!"); +} + +void SCEVUnknown::deleted() { + // Clear this SCEVUnknown from various maps. + SE->forgetMemoizedResults(this); + + // Remove this SCEVUnknown from the uniquing map. + SE->UniqueSCEVs.RemoveNode(this); + + // Release the value. + setValPtr(0); +} + +void SCEVUnknown::allUsesReplacedWith(Value *New) { + // Clear this SCEVUnknown from various maps. + SE->forgetMemoizedResults(this); + + // Remove this SCEVUnknown from the uniquing map. + SE->UniqueSCEVs.RemoveNode(this); + + // Update this SCEVUnknown to point to the new value. This is needed + // because there may still be outstanding SCEVs which still point to + // this SCEVUnknown. + setValPtr(New); +} + +bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const { + if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) + if (VCE->getOpcode() == Instruction::PtrToInt) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) + if (CE->getOpcode() == Instruction::GetElementPtr && + CE->getOperand(0)->isNullValue() && + CE->getNumOperands() == 2) + if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1))) + if (CI->isOne()) { + AllocTy = cast<PointerType>(CE->getOperand(0)->getType()) + ->getElementType(); + return true; + } + + return false; +} + +bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const { + if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) + if (VCE->getOpcode() == Instruction::PtrToInt) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) + if (CE->getOpcode() == Instruction::GetElementPtr && + CE->getOperand(0)->isNullValue()) { + const Type *Ty = + cast<PointerType>(CE->getOperand(0)->getType())->getElementType(); + if (const StructType *STy = dyn_cast<StructType>(Ty)) + if (!STy->isPacked() && + CE->getNumOperands() == 3 && + CE->getOperand(1)->isNullValue()) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2))) + if (CI->isOne() && + STy->getNumElements() == 2 && + STy->getElementType(0)->isIntegerTy(1)) { + AllocTy = STy->getElementType(1); + return true; + } + } + } + + return false; +} + +bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const { + if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) + if (VCE->getOpcode() == Instruction::PtrToInt) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) + if (CE->getOpcode() == Instruction::GetElementPtr && + CE->getNumOperands() == 3 && + CE->getOperand(0)->isNullValue() && + CE->getOperand(1)->isNullValue()) { + const Type *Ty = + cast<PointerType>(CE->getOperand(0)->getType())->getElementType(); + // Ignore vector types here so that ScalarEvolutionExpander doesn't + // emit getelementptrs that index into vectors. + if (Ty->isStructTy() || Ty->isArrayTy()) { + CTy = Ty; + FieldNo = CE->getOperand(2); + return true; + } + } + + return false; +} + +//===----------------------------------------------------------------------===// +// SCEV Utilities +//===----------------------------------------------------------------------===// + +namespace { + /// SCEVComplexityCompare - Return true if the complexity of the LHS is less + /// than the complexity of the RHS. This comparator is used to canonicalize + /// expressions. + class SCEVComplexityCompare { + const LoopInfo *const LI; + public: + explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {} + + // Return true or false if LHS is less than, or at least RHS, respectively. + bool operator()(const SCEV *LHS, const SCEV *RHS) const { + return compare(LHS, RHS) < 0; + } + + // Return negative, zero, or positive, if LHS is less than, equal to, or + // greater than RHS, respectively. A three-way result allows recursive + // comparisons to be more efficient. + int compare(const SCEV *LHS, const SCEV *RHS) const { + // Fast-path: SCEVs are uniqued so we can do a quick equality check. + if (LHS == RHS) + return 0; + + // Primarily, sort the SCEVs by their getSCEVType(). + unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType(); + if (LType != RType) + return (int)LType - (int)RType; + + // Aside from the getSCEVType() ordering, the particular ordering + // isn't very important except that it's beneficial to be consistent, + // so that (a + b) and (b + a) don't end up as different expressions. + switch (LType) { + case scUnknown: { + const SCEVUnknown *LU = cast<SCEVUnknown>(LHS); + const SCEVUnknown *RU = cast<SCEVUnknown>(RHS); + + // Sort SCEVUnknown values with some loose heuristics. TODO: This is + // not as complete as it could be. + const Value *LV = LU->getValue(), *RV = RU->getValue(); + + // Order pointer values after integer values. This helps SCEVExpander + // form GEPs. + bool LIsPointer = LV->getType()->isPointerTy(), + RIsPointer = RV->getType()->isPointerTy(); + if (LIsPointer != RIsPointer) + return (int)LIsPointer - (int)RIsPointer; + + // Compare getValueID values. + unsigned LID = LV->getValueID(), + RID = RV->getValueID(); + if (LID != RID) + return (int)LID - (int)RID; + + // Sort arguments by their position. + if (const Argument *LA = dyn_cast<Argument>(LV)) { + const Argument *RA = cast<Argument>(RV); + unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo(); + return (int)LArgNo - (int)RArgNo; + } + + // For instructions, compare their loop depth, and their operand + // count. This is pretty loose. + if (const Instruction *LInst = dyn_cast<Instruction>(LV)) { + const Instruction *RInst = cast<Instruction>(RV); + + // Compare loop depths. + const BasicBlock *LParent = LInst->getParent(), + *RParent = RInst->getParent(); + if (LParent != RParent) { + unsigned LDepth = LI->getLoopDepth(LParent), + RDepth = LI->getLoopDepth(RParent); + if (LDepth != RDepth) + return (int)LDepth - (int)RDepth; + } + + // Compare the number of operands. + unsigned LNumOps = LInst->getNumOperands(), + RNumOps = RInst->getNumOperands(); + return (int)LNumOps - (int)RNumOps; + } + + return 0; + } + + case scConstant: { + const SCEVConstant *LC = cast<SCEVConstant>(LHS); + const SCEVConstant *RC = cast<SCEVConstant>(RHS); + + // Compare constant values. + const APInt &LA = LC->getValue()->getValue(); + const APInt &RA = RC->getValue()->getValue(); + unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth(); + if (LBitWidth != RBitWidth) + return (int)LBitWidth - (int)RBitWidth; + return LA.ult(RA) ? -1 : 1; + } + + case scAddRecExpr: { + const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS); + const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS); + + // Compare addrec loop depths. + const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop(); + if (LLoop != RLoop) { + unsigned LDepth = LLoop->getLoopDepth(), + RDepth = RLoop->getLoopDepth(); + if (LDepth != RDepth) + return (int)LDepth - (int)RDepth; + } + + // Addrec complexity grows with operand count. + unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands(); + if (LNumOps != RNumOps) + return (int)LNumOps - (int)RNumOps; + + // Lexicographically compare. + for (unsigned i = 0; i != LNumOps; ++i) { + long X = compare(LA->getOperand(i), RA->getOperand(i)); + if (X != 0) + return X; + } + + return 0; + } + + case scAddExpr: + case scMulExpr: + case scSMaxExpr: + case scUMaxExpr: { + const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS); + const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS); + + // Lexicographically compare n-ary expressions. + unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands(); + for (unsigned i = 0; i != LNumOps; ++i) { + if (i >= RNumOps) + return 1; + long X = compare(LC->getOperand(i), RC->getOperand(i)); + if (X != 0) + return X; + } + return (int)LNumOps - (int)RNumOps; + } + + case scUDivExpr: { + const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS); + const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS); + + // Lexicographically compare udiv expressions. + long X = compare(LC->getLHS(), RC->getLHS()); + if (X != 0) + return X; + return compare(LC->getRHS(), RC->getRHS()); + } + + case scTruncate: + case scZeroExtend: + case scSignExtend: { + const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS); + const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS); + + // Compare cast expressions by operand. + return compare(LC->getOperand(), RC->getOperand()); + } + + default: + break; + } + + llvm_unreachable("Unknown SCEV kind!"); + return 0; + } + }; +} + +/// GroupByComplexity - Given a list of SCEV objects, order them by their +/// complexity, and group objects of the same complexity together by value. +/// When this routine is finished, we know that any duplicates in the vector are +/// consecutive and that complexity is monotonically increasing. +/// +/// Note that we go take special precautions to ensure that we get deterministic +/// results from this routine. In other words, we don't want the results of +/// this to depend on where the addresses of various SCEV objects happened to +/// land in memory. +/// +static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops, + LoopInfo *LI) { + if (Ops.size() < 2) return; // Noop + if (Ops.size() == 2) { + // This is the common case, which also happens to be trivially simple. + // Special case it. + const SCEV *&LHS = Ops[0], *&RHS = Ops[1]; + if (SCEVComplexityCompare(LI)(RHS, LHS)) + std::swap(LHS, RHS); + return; + } + + // Do the rough sort by complexity. + std::stable_sort(Ops.begin(), Ops.end(), SCEVComplexityCompare(LI)); + + // Now that we are sorted by complexity, group elements of the same + // complexity. Note that this is, at worst, N^2, but the vector is likely to + // be extremely short in practice. Note that we take this approach because we + // do not want to depend on the addresses of the objects we are grouping. + for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) { + const SCEV *S = Ops[i]; + unsigned Complexity = S->getSCEVType(); + + // If there are any objects of the same complexity and same value as this + // one, group them. + for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) { + if (Ops[j] == S) { // Found a duplicate. + // Move it to immediately after i'th element. + std::swap(Ops[i+1], Ops[j]); + ++i; // no need to rescan it. + if (i == e-2) return; // Done! + } + } + } +} + + + +//===----------------------------------------------------------------------===// +// Simple SCEV method implementations +//===----------------------------------------------------------------------===// + +/// BinomialCoefficient - Compute BC(It, K). The result has width W. +/// Assume, K > 0. +static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, + ScalarEvolution &SE, + const Type* ResultTy) { + // Handle the simplest case efficiently. + if (K == 1) + return SE.getTruncateOrZeroExtend(It, ResultTy); + + // We are using the following formula for BC(It, K): + // + // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K! + // + // Suppose, W is the bitwidth of the return value. We must be prepared for + // overflow. Hence, we must assure that the result of our computation is + // equal to the accurate one modulo 2^W. Unfortunately, division isn't + // safe in modular arithmetic. + // + // However, this code doesn't use exactly that formula; the formula it uses + // is something like the following, where T is the number of factors of 2 in + // K! (i.e. trailing zeros in the binary representation of K!), and ^ is + // exponentiation: + // + // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T) + // + // This formula is trivially equivalent to the previous formula. However, + // this formula can be implemented much more efficiently. The trick is that + // K! / 2^T is odd, and exact division by an odd number *is* safe in modular + // arithmetic. To do exact division in modular arithmetic, all we have + // to do is multiply by the inverse. Therefore, this step can be done at + // width W. + // + // The next issue is how to safely do the division by 2^T. The way this + // is done is by doing the multiplication step at a width of at least W + T + // bits. This way, the bottom W+T bits of the product are accurate. Then, + // when we perform the division by 2^T (which is equivalent to a right shift + // by T), the bottom W bits are accurate. Extra bits are okay; they'll get + // truncated out after the division by 2^T. + // + // In comparison to just directly using the first formula, this technique + // is much more efficient; using the first formula requires W * K bits, + // but this formula less than W + K bits. Also, the first formula requires + // a division step, whereas this formula only requires multiplies and shifts. + // + // It doesn't matter whether the subtraction step is done in the calculation + // width or the input iteration count's width; if the subtraction overflows, + // the result must be zero anyway. We prefer here to do it in the width of + // the induction variable because it helps a lot for certain cases; CodeGen + // isn't smart enough to ignore the overflow, which leads to much less + // efficient code if the width of the subtraction is wider than the native + // register width. + // + // (It's possible to not widen at all by pulling out factors of 2 before + // the multiplication; for example, K=2 can be calculated as + // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires + // extra arithmetic, so it's not an obvious win, and it gets + // much more complicated for K > 3.) + + // Protection from insane SCEVs; this bound is conservative, + // but it probably doesn't matter. + if (K > 1000) + return SE.getCouldNotCompute(); + + unsigned W = SE.getTypeSizeInBits(ResultTy); + + // Calculate K! / 2^T and T; we divide out the factors of two before + // multiplying for calculating K! / 2^T to avoid overflow. + // Other overflow doesn't matter because we only care about the bottom + // W bits of the result. + APInt OddFactorial(W, 1); + unsigned T = 1; + for (unsigned i = 3; i <= K; ++i) { + APInt Mult(W, i); + unsigned TwoFactors = Mult.countTrailingZeros(); + T += TwoFactors; + Mult = Mult.lshr(TwoFactors); + OddFactorial *= Mult; + } + + // We need at least W + T bits for the multiplication step + unsigned CalculationBits = W + T; + + // Calculate 2^T, at width T+W. + APInt DivFactor = APInt(CalculationBits, 1).shl(T); + + // Calculate the multiplicative inverse of K! / 2^T; + // this multiplication factor will perform the exact division by + // K! / 2^T. + APInt Mod = APInt::getSignedMinValue(W+1); + APInt MultiplyFactor = OddFactorial.zext(W+1); + MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod); + MultiplyFactor = MultiplyFactor.trunc(W); + + // Calculate the product, at width T+W + const IntegerType *CalculationTy = IntegerType::get(SE.getContext(), + CalculationBits); + const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy); + for (unsigned i = 1; i != K; ++i) { + const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i)); + Dividend = SE.getMulExpr(Dividend, + SE.getTruncateOrZeroExtend(S, CalculationTy)); + } + + // Divide by 2^T + const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor)); + + // Truncate the result, and divide by K! / 2^T. + + return SE.getMulExpr(SE.getConstant(MultiplyFactor), + SE.getTruncateOrZeroExtend(DivResult, ResultTy)); +} + +/// evaluateAtIteration - Return the value of this chain of recurrences at +/// the specified iteration number. We can evaluate this recurrence by +/// multiplying each element in the chain by the binomial coefficient +/// corresponding to it. In other words, we can evaluate {A,+,B,+,C,+,D} as: +/// +/// A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3) +/// +/// where BC(It, k) stands for binomial coefficient. +/// +const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It, + ScalarEvolution &SE) const { + const SCEV *Result = getStart(); + for (unsigned i = 1, e = getNumOperands(); i != e; ++i) { + // The computation is correct in the face of overflow provided that the + // multiplication is performed _after_ the evaluation of the binomial + // coefficient. + const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType()); + if (isa<SCEVCouldNotCompute>(Coeff)) + return Coeff; + + Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff)); + } + return Result; +} + +//===----------------------------------------------------------------------===// +// SCEV Expression folder implementations +//===----------------------------------------------------------------------===// + +const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, + const Type *Ty) { + assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) && + "This is not a truncating conversion!"); + assert(isSCEVable(Ty) && + "This is not a conversion to a SCEVable type!"); + Ty = getEffectiveSCEVType(Ty); + + FoldingSetNodeID ID; + ID.AddInteger(scTruncate); + ID.AddPointer(Op); + ID.AddPointer(Ty); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + + // Fold if the operand is constant. + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) + return getConstant( + cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), + getEffectiveSCEVType(Ty)))); + + // trunc(trunc(x)) --> trunc(x) + if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) + return getTruncateExpr(ST->getOperand(), Ty); + + // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing + if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op)) + return getTruncateOrSignExtend(SS->getOperand(), Ty); + + // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing + if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) + return getTruncateOrZeroExtend(SZ->getOperand(), Ty); + + // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can + // eliminate all the truncates. + if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) { + SmallVector<const SCEV *, 4> Operands; + bool hasTrunc = false; + for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) { + const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty); + hasTrunc = isa<SCEVTruncateExpr>(S); + Operands.push_back(S); + } + if (!hasTrunc) + return getAddExpr(Operands); + UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL. + } + + // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can + // eliminate all the truncates. + if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) { + SmallVector<const SCEV *, 4> Operands; + bool hasTrunc = false; + for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) { + const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty); + hasTrunc = isa<SCEVTruncateExpr>(S); + Operands.push_back(S); + } + if (!hasTrunc) + return getMulExpr(Operands); + UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL. + } + + // If the input value is a chrec scev, truncate the chrec's operands. + if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) { + SmallVector<const SCEV *, 4> Operands; + for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) + Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty)); + return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap); + } + + // As a special case, fold trunc(undef) to undef. We don't want to + // know too much about SCEVUnknowns, but this special case is handy + // and harmless. + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Op)) + if (isa<UndefValue>(U->getValue())) + return getSCEV(UndefValue::get(Ty)); + + // The cast wasn't folded; create an explicit cast node. We can reuse + // the existing insert position since if we get here, we won't have + // made any changes which would invalidate it. + SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), + Op, Ty); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + +const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, + const Type *Ty) { + assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && + "This is not an extending conversion!"); + assert(isSCEVable(Ty) && + "This is not a conversion to a SCEVable type!"); + Ty = getEffectiveSCEVType(Ty); + + // Fold if the operand is constant. + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) + return getConstant( + cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), + getEffectiveSCEVType(Ty)))); + + // zext(zext(x)) --> zext(x) + if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) + return getZeroExtendExpr(SZ->getOperand(), Ty); + + // Before doing any expensive analysis, check to see if we've already + // computed a SCEV for this Op and Ty. + FoldingSetNodeID ID; + ID.AddInteger(scZeroExtend); + ID.AddPointer(Op); + ID.AddPointer(Ty); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + + // zext(trunc(x)) --> zext(x) or x or trunc(x) + if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) { + // It's possible the bits taken off by the truncate were all zero bits. If + // so, we should be able to simplify this further. + const SCEV *X = ST->getOperand(); + ConstantRange CR = getUnsignedRange(X); + unsigned TruncBits = getTypeSizeInBits(ST->getType()); + unsigned NewBits = getTypeSizeInBits(Ty); + if (CR.truncate(TruncBits).zeroExtend(NewBits).contains( + CR.zextOrTrunc(NewBits))) + return getTruncateOrZeroExtend(X, Ty); + } + + // If the input value is a chrec scev, and we can prove that the value + // did not overflow the old, smaller, value, we can zero extend all of the + // operands (often constants). This allows analysis of something like + // this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; } + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) + if (AR->isAffine()) { + const SCEV *Start = AR->getStart(); + const SCEV *Step = AR->getStepRecurrence(*this); + unsigned BitWidth = getTypeSizeInBits(AR->getType()); + const Loop *L = AR->getLoop(); + + // If we have special knowledge that this addrec won't overflow, + // we don't need to do any further analysis. + if (AR->getNoWrapFlags(SCEV::FlagNUW)) + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getZeroExtendExpr(Step, Ty), + L, AR->getNoWrapFlags()); + + // Check whether the backedge-taken count is SCEVCouldNotCompute. + // Note that this serves two purposes: It filters out loops that are + // simply not analyzable, and it covers the case where this code is + // being called from within backedge-taken count analysis, such that + // attempting to ask for the backedge-taken count would likely result + // in infinite recursion. In the later case, the analysis code will + // cope with a conservative value, and it will take care to purge + // that value once it has finished. + const SCEV *MaxBECount = getMaxBackedgeTakenCount(L); + if (!isa<SCEVCouldNotCompute>(MaxBECount)) { + // Manually compute the final value for AR, checking for + // overflow. + + // Check whether the backedge-taken count can be losslessly casted to + // the addrec's type. The count is always unsigned. + const SCEV *CastedMaxBECount = + getTruncateOrZeroExtend(MaxBECount, Start->getType()); + const SCEV *RecastedMaxBECount = + getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); + if (MaxBECount == RecastedMaxBECount) { + const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); + // Check whether Start+Step*MaxBECount has no unsigned overflow. + const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step); + const SCEV *Add = getAddExpr(Start, ZMul); + const SCEV *OperandExtendedAdd = + getAddExpr(getZeroExtendExpr(Start, WideTy), + getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getZeroExtendExpr(Step, WideTy))); + if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) { + // Cache knowledge of AR NUW, which is propagated to this AddRec. + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); + // Return the expression with the addrec on the outside. + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getZeroExtendExpr(Step, Ty), + L, AR->getNoWrapFlags()); + } + // Similar to above, only this time treat the step value as signed. + // This covers loops that count down. + const SCEV *SMul = getMulExpr(CastedMaxBECount, Step); + Add = getAddExpr(Start, SMul); + OperandExtendedAdd = + getAddExpr(getZeroExtendExpr(Start, WideTy), + getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getSignExtendExpr(Step, WideTy))); + if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) { + // Cache knowledge of AR NW, which is propagated to this AddRec. + // Negative step causes unsigned wrap, but it still can't self-wrap. + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW); + // Return the expression with the addrec on the outside. + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + L, AR->getNoWrapFlags()); + } + } + + // If the backedge is guarded by a comparison with the pre-inc value + // the addrec is safe. Also, if the entry is guarded by a comparison + // with the start value and the backedge is guarded by a comparison + // with the post-inc value, the addrec is safe. + if (isKnownPositive(Step)) { + const SCEV *N = getConstant(APInt::getMinValue(BitWidth) - + getUnsignedRange(Step).getUnsignedMax()); + if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) || + (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) && + isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, + AR->getPostIncExpr(*this), N))) { + // Cache knowledge of AR NUW, which is propagated to this AddRec. + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); + // Return the expression with the addrec on the outside. + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getZeroExtendExpr(Step, Ty), + L, AR->getNoWrapFlags()); + } + } else if (isKnownNegative(Step)) { + const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) - + getSignedRange(Step).getSignedMin()); + if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) || + (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) && + isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, + AR->getPostIncExpr(*this), N))) { + // Cache knowledge of AR NW, which is propagated to this AddRec. + // Negative step causes unsigned wrap, but it still can't self-wrap. + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW); + // Return the expression with the addrec on the outside. + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + L, AR->getNoWrapFlags()); + } + } + } + } + + // The cast wasn't folded; create an explicit cast node. + // Recompute the insert position, as it may have been invalidated. + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), + Op, Ty); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + +// Get the limit of a recurrence such that incrementing by Step cannot cause +// signed overflow as long as the value of the recurrence within the loop does +// not exceed this limit before incrementing. +static const SCEV *getOverflowLimitForStep(const SCEV *Step, + ICmpInst::Predicate *Pred, + ScalarEvolution *SE) { + unsigned BitWidth = SE->getTypeSizeInBits(Step->getType()); + if (SE->isKnownPositive(Step)) { + *Pred = ICmpInst::ICMP_SLT; + return SE->getConstant(APInt::getSignedMinValue(BitWidth) - + SE->getSignedRange(Step).getSignedMax()); + } + if (SE->isKnownNegative(Step)) { + *Pred = ICmpInst::ICMP_SGT; + return SE->getConstant(APInt::getSignedMaxValue(BitWidth) - + SE->getSignedRange(Step).getSignedMin()); + } + return 0; +} + +// The recurrence AR has been shown to have no signed wrap. Typically, if we can +// prove NSW for AR, then we can just as easily prove NSW for its preincrement +// or postincrement sibling. This allows normalizing a sign extended AddRec as +// such: {sext(Step + Start),+,Step} => {(Step + sext(Start),+,Step} As a +// result, the expression "Step + sext(PreIncAR)" is congruent with +// "sext(PostIncAR)" +static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR, + const Type *Ty, + ScalarEvolution *SE) { + const Loop *L = AR->getLoop(); + const SCEV *Start = AR->getStart(); + const SCEV *Step = AR->getStepRecurrence(*SE); + + // Check for a simple looking step prior to loop entry. + const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start); + if (!SA || SA->getNumOperands() != 2 || SA->getOperand(0) != Step) + return 0; + + // This is a postinc AR. Check for overflow on the preinc recurrence using the + // same three conditions that getSignExtendedExpr checks. + + // 1. NSW flags on the step increment. + const SCEV *PreStart = SA->getOperand(1); + const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>( + SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap)); + + if (PreAR && PreAR->getNoWrapFlags(SCEV::FlagNSW)) + return PreStart; + + // 2. Direct overflow check on the step operation's expression. + unsigned BitWidth = SE->getTypeSizeInBits(AR->getType()); + const Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2); + const SCEV *OperandExtendedStart = + SE->getAddExpr(SE->getSignExtendExpr(PreStart, WideTy), + SE->getSignExtendExpr(Step, WideTy)); + if (SE->getSignExtendExpr(Start, WideTy) == OperandExtendedStart) { + // Cache knowledge of PreAR NSW. + if (PreAR) + const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(SCEV::FlagNSW); + // FIXME: this optimization needs a unit test + DEBUG(dbgs() << "SCEV: untested prestart overflow check\n"); + return PreStart; + } + + // 3. Loop precondition. + ICmpInst::Predicate Pred; + const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, SE); + + if (OverflowLimit && + SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) { + return PreStart; + } + return 0; +} + +// Get the normalized sign-extended expression for this AddRec's Start. +static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR, + const Type *Ty, + ScalarEvolution *SE) { + const SCEV *PreStart = getPreStartForSignExtend(AR, Ty, SE); + if (!PreStart) + return SE->getSignExtendExpr(AR->getStart(), Ty); + + return SE->getAddExpr(SE->getSignExtendExpr(AR->getStepRecurrence(*SE), Ty), + SE->getSignExtendExpr(PreStart, Ty)); +} + +const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, + const Type *Ty) { + assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && + "This is not an extending conversion!"); + assert(isSCEVable(Ty) && + "This is not a conversion to a SCEVable type!"); + Ty = getEffectiveSCEVType(Ty); + + // Fold if the operand is constant. + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) + return getConstant( + cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), + getEffectiveSCEVType(Ty)))); + + // sext(sext(x)) --> sext(x) + if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op)) + return getSignExtendExpr(SS->getOperand(), Ty); + + // sext(zext(x)) --> zext(x) + if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) + return getZeroExtendExpr(SZ->getOperand(), Ty); + + // Before doing any expensive analysis, check to see if we've already + // computed a SCEV for this Op and Ty. + FoldingSetNodeID ID; + ID.AddInteger(scSignExtend); + ID.AddPointer(Op); + ID.AddPointer(Ty); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + + // If the input value is provably positive, build a zext instead. + if (isKnownNonNegative(Op)) + return getZeroExtendExpr(Op, Ty); + + // sext(trunc(x)) --> sext(x) or x or trunc(x) + if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) { + // It's possible the bits taken off by the truncate were all sign bits. If + // so, we should be able to simplify this further. + const SCEV *X = ST->getOperand(); + ConstantRange CR = getSignedRange(X); + unsigned TruncBits = getTypeSizeInBits(ST->getType()); + unsigned NewBits = getTypeSizeInBits(Ty); + if (CR.truncate(TruncBits).signExtend(NewBits).contains( + CR.sextOrTrunc(NewBits))) + return getTruncateOrSignExtend(X, Ty); + } + + // If the input value is a chrec scev, and we can prove that the value + // did not overflow the old, smaller, value, we can sign extend all of the + // operands (often constants). This allows analysis of something like + // this: for (signed char X = 0; X < 100; ++X) { int Y = X; } + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) + if (AR->isAffine()) { + const SCEV *Start = AR->getStart(); + const SCEV *Step = AR->getStepRecurrence(*this); + unsigned BitWidth = getTypeSizeInBits(AR->getType()); + const Loop *L = AR->getLoop(); + + // If we have special knowledge that this addrec won't overflow, + // we don't need to do any further analysis. + if (AR->getNoWrapFlags(SCEV::FlagNSW)) + return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), + getSignExtendExpr(Step, Ty), + L, SCEV::FlagNSW); + + // Check whether the backedge-taken count is SCEVCouldNotCompute. + // Note that this serves two purposes: It filters out loops that are + // simply not analyzable, and it covers the case where this code is + // being called from within backedge-taken count analysis, such that + // attempting to ask for the backedge-taken count would likely result + // in infinite recursion. In the later case, the analysis code will + // cope with a conservative value, and it will take care to purge + // that value once it has finished. + const SCEV *MaxBECount = getMaxBackedgeTakenCount(L); + if (!isa<SCEVCouldNotCompute>(MaxBECount)) { + // Manually compute the final value for AR, checking for + // overflow. + + // Check whether the backedge-taken count can be losslessly casted to + // the addrec's type. The count is always unsigned. + const SCEV *CastedMaxBECount = + getTruncateOrZeroExtend(MaxBECount, Start->getType()); + const SCEV *RecastedMaxBECount = + getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); + if (MaxBECount == RecastedMaxBECount) { + const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); + // Check whether Start+Step*MaxBECount has no signed overflow. + const SCEV *SMul = getMulExpr(CastedMaxBECount, Step); + const SCEV *Add = getAddExpr(Start, SMul); + const SCEV *OperandExtendedAdd = + getAddExpr(getSignExtendExpr(Start, WideTy), + getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getSignExtendExpr(Step, WideTy))); + if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) { + // Cache knowledge of AR NSW, which is propagated to this AddRec. + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); + // Return the expression with the addrec on the outside. + return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), + getSignExtendExpr(Step, Ty), + L, AR->getNoWrapFlags()); + } + // Similar to above, only this time treat the step value as unsigned. + // This covers loops that count up with an unsigned step. + const SCEV *UMul = getMulExpr(CastedMaxBECount, Step); + Add = getAddExpr(Start, UMul); + OperandExtendedAdd = + getAddExpr(getSignExtendExpr(Start, WideTy), + getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getZeroExtendExpr(Step, WideTy))); + if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) { + // Cache knowledge of AR NSW, which is propagated to this AddRec. + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); + // Return the expression with the addrec on the outside. + return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), + getZeroExtendExpr(Step, Ty), + L, AR->getNoWrapFlags()); + } + } + + // If the backedge is guarded by a comparison with the pre-inc value + // the addrec is safe. Also, if the entry is guarded by a comparison + // with the start value and the backedge is guarded by a comparison + // with the post-inc value, the addrec is safe. + ICmpInst::Predicate Pred; + const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, this); + if (OverflowLimit && + (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) || + (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) && + isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this), + OverflowLimit)))) { + // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec. + const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); + return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this), + getSignExtendExpr(Step, Ty), + L, AR->getNoWrapFlags()); + } + } + } + + // The cast wasn't folded; create an explicit cast node. + // Recompute the insert position, as it may have been invalidated. + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), + Op, Ty); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + +/// getAnyExtendExpr - Return a SCEV for the given operand extended with +/// unspecified bits out to the given type. +/// +const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, + const Type *Ty) { + assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && + "This is not an extending conversion!"); + assert(isSCEVable(Ty) && + "This is not a conversion to a SCEVable type!"); + Ty = getEffectiveSCEVType(Ty); + + // Sign-extend negative constants. + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) + if (SC->getValue()->getValue().isNegative()) + return getSignExtendExpr(Op, Ty); + + // Peel off a truncate cast. + if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) { + const SCEV *NewOp = T->getOperand(); + if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty)) + return getAnyExtendExpr(NewOp, Ty); + return getTruncateOrNoop(NewOp, Ty); + } + + // Next try a zext cast. If the cast is folded, use it. + const SCEV *ZExt = getZeroExtendExpr(Op, Ty); + if (!isa<SCEVZeroExtendExpr>(ZExt)) + return ZExt; + + // Next try a sext cast. If the cast is folded, use it. + const SCEV *SExt = getSignExtendExpr(Op, Ty); + if (!isa<SCEVSignExtendExpr>(SExt)) + return SExt; + + // Force the cast to be folded into the operands of an addrec. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) { + SmallVector<const SCEV *, 4> Ops; + for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); + I != E; ++I) + Ops.push_back(getAnyExtendExpr(*I, Ty)); + return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW); + } + + // As a special case, fold anyext(undef) to undef. We don't want to + // know too much about SCEVUnknowns, but this special case is handy + // and harmless. + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Op)) + if (isa<UndefValue>(U->getValue())) + return getSCEV(UndefValue::get(Ty)); + + // If the expression is obviously signed, use the sext cast value. + if (isa<SCEVSMaxExpr>(Op)) + return SExt; + + // Absent any other information, use the zext cast value. + return ZExt; +} + +/// CollectAddOperandsWithScales - Process the given Ops list, which is +/// a list of operands to be added under the given scale, update the given +/// map. This is a helper function for getAddRecExpr. As an example of +/// what it does, given a sequence of operands that would form an add +/// expression like this: +/// +/// m + n + 13 + (A * (o + p + (B * q + m + 29))) + r + (-1 * r) +/// +/// where A and B are constants, update the map with these values: +/// +/// (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0) +/// +/// and add 13 + A*B*29 to AccumulatedConstant. +/// This will allow getAddRecExpr to produce this: +/// +/// 13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B) +/// +/// This form often exposes folding opportunities that are hidden in +/// the original operand list. +/// +/// Return true iff it appears that any interesting folding opportunities +/// may be exposed. This helps getAddRecExpr short-circuit extra work in +/// the common case where no interesting opportunities are present, and +/// is also used as a check to avoid infinite recursion. +/// +static bool +CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M, + SmallVector<const SCEV *, 8> &NewOps, + APInt &AccumulatedConstant, + const SCEV *const *Ops, size_t NumOperands, + const APInt &Scale, + ScalarEvolution &SE) { + bool Interesting = false; + + // Iterate over the add operands. They are sorted, with constants first. + unsigned i = 0; + while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) { + ++i; + // Pull a buried constant out to the outside. + if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero()) + Interesting = true; + AccumulatedConstant += Scale * C->getValue()->getValue(); + } + + // Next comes everything else. We're especially interested in multiplies + // here, but they're in the middle, so just visit the rest with one loop. + for (; i != NumOperands; ++i) { + const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]); + if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) { + APInt NewScale = + Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue(); + if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) { + // A multiplication of a constant with another add; recurse. + const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1)); + Interesting |= + CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, + Add->op_begin(), Add->getNumOperands(), + NewScale, SE); + } else { + // A multiplication of a constant with some other value. Update + // the map. + SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end()); + const SCEV *Key = SE.getMulExpr(MulOps); + std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair = + M.insert(std::make_pair(Key, NewScale)); + if (Pair.second) { + NewOps.push_back(Pair.first->first); + } else { + Pair.first->second += NewScale; + // The map already had an entry for this value, which may indicate + // a folding opportunity. + Interesting = true; + } + } + } else { + // An ordinary operand. Update the map. + std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair = + M.insert(std::make_pair(Ops[i], Scale)); + if (Pair.second) { + NewOps.push_back(Pair.first->first); + } else { + Pair.first->second += Scale; + // The map already had an entry for this value, which may indicate + // a folding opportunity. + Interesting = true; + } + } + } + + return Interesting; +} + +namespace { + struct APIntCompare { + bool operator()(const APInt &LHS, const APInt &RHS) const { + return LHS.ult(RHS); + } + }; +} + +/// getAddExpr - Get a canonical add expression, or something simpler if +/// possible. +const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, + SCEV::NoWrapFlags Flags) { + assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) && + "only nuw or nsw allowed"); + assert(!Ops.empty() && "Cannot get empty add!"); + if (Ops.size() == 1) return Ops[0]; +#ifndef NDEBUG + const Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); + for (unsigned i = 1, e = Ops.size(); i != e; ++i) + assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && + "SCEVAddExpr operand types don't match!"); +#endif + + // If FlagNSW is true and all the operands are non-negative, infer FlagNUW. + // And vice-versa. + int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW; + SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask); + if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) { + bool All = true; + for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(), + E = Ops.end(); I != E; ++I) + if (!isKnownNonNegative(*I)) { + All = false; + break; + } + if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask); + } + + // Sort by complexity, this groups all similar expression types together. + GroupByComplexity(Ops, LI); + + // If there are any constants, fold them together. + unsigned Idx = 0; + if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { + ++Idx; + assert(Idx < Ops.size()); + while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { + // We found two constants, fold them together! + Ops[0] = getConstant(LHSC->getValue()->getValue() + + RHSC->getValue()->getValue()); + if (Ops.size() == 2) return Ops[0]; + Ops.erase(Ops.begin()+1); // Erase the folded element + LHSC = cast<SCEVConstant>(Ops[0]); + } + + // If we are left with a constant zero being added, strip it off. + if (LHSC->getValue()->isZero()) { + Ops.erase(Ops.begin()); + --Idx; + } + + if (Ops.size() == 1) return Ops[0]; + } + + // Okay, check to see if the same value occurs in the operand list more than + // once. If so, merge them together into an multiply expression. Since we + // sorted the list, these values are required to be adjacent. + const Type *Ty = Ops[0]->getType(); + bool FoundMatch = false; + for (unsigned i = 0, e = Ops.size(); i != e-1; ++i) + if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2 + // Scan ahead to count how many equal operands there are. + unsigned Count = 2; + while (i+Count != e && Ops[i+Count] == Ops[i]) + ++Count; + // Merge the values into a multiply. + const SCEV *Scale = getConstant(Ty, Count); + const SCEV *Mul = getMulExpr(Scale, Ops[i]); + if (Ops.size() == Count) + return Mul; + Ops[i] = Mul; + Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count); + --i; e -= Count - 1; + FoundMatch = true; + } + if (FoundMatch) + return getAddExpr(Ops, Flags); + + // Check for truncates. If all the operands are truncated from the same + // type, see if factoring out the truncate would permit the result to be + // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n) + // if the contents of the resulting outer trunc fold to something simple. + for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) { + const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]); + const Type *DstType = Trunc->getType(); + const Type *SrcType = Trunc->getOperand()->getType(); + SmallVector<const SCEV *, 8> LargeOps; + bool Ok = true; + // Check all the operands to see if they can be represented in the + // source type of the truncate. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) { + if (T->getOperand()->getType() != SrcType) { + Ok = false; + break; + } + LargeOps.push_back(T->getOperand()); + } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) { + LargeOps.push_back(getAnyExtendExpr(C, SrcType)); + } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) { + SmallVector<const SCEV *, 8> LargeMulOps; + for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) { + if (const SCEVTruncateExpr *T = + dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) { + if (T->getOperand()->getType() != SrcType) { + Ok = false; + break; + } + LargeMulOps.push_back(T->getOperand()); + } else if (const SCEVConstant *C = + dyn_cast<SCEVConstant>(M->getOperand(j))) { + LargeMulOps.push_back(getAnyExtendExpr(C, SrcType)); + } else { + Ok = false; + break; + } + } + if (Ok) + LargeOps.push_back(getMulExpr(LargeMulOps)); + } else { + Ok = false; + break; + } + } + if (Ok) { + // Evaluate the expression in the larger type. + const SCEV *Fold = getAddExpr(LargeOps, Flags); + // If it folds to something simple, use it. Otherwise, don't. + if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold)) + return getTruncateExpr(Fold, DstType); + } + } + + // Skip past any other cast SCEVs. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr) + ++Idx; + + // If there are add operands they would be next. + if (Idx < Ops.size()) { + bool DeletedAdd = false; + while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) { + // If we have an add, expand the add operands onto the end of the operands + // list. + Ops.erase(Ops.begin()+Idx); + Ops.append(Add->op_begin(), Add->op_end()); + DeletedAdd = true; + } + + // If we deleted at least one add, we added operands to the end of the list, + // and they are not necessarily sorted. Recurse to resort and resimplify + // any operands we just acquired. + if (DeletedAdd) + return getAddExpr(Ops); + } + + // Skip over the add expression until we get to a multiply. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr) + ++Idx; + + // Check to see if there are any folding opportunities present with + // operands multiplied by constant values. + if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) { + uint64_t BitWidth = getTypeSizeInBits(Ty); + DenseMap<const SCEV *, APInt> M; + SmallVector<const SCEV *, 8> NewOps; + APInt AccumulatedConstant(BitWidth, 0); + if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, + Ops.data(), Ops.size(), + APInt(BitWidth, 1), *this)) { + // Some interesting folding opportunity is present, so its worthwhile to + // re-generate the operands list. Group the operands by constant scale, + // to avoid multiplying by the same constant scale multiple times. + std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists; + for (SmallVector<const SCEV *, 8>::const_iterator I = NewOps.begin(), + E = NewOps.end(); I != E; ++I) + MulOpLists[M.find(*I)->second].push_back(*I); + // Re-generate the operands list. + Ops.clear(); + if (AccumulatedConstant != 0) + Ops.push_back(getConstant(AccumulatedConstant)); + for (std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare>::iterator + I = MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I) + if (I->first != 0) + Ops.push_back(getMulExpr(getConstant(I->first), + getAddExpr(I->second))); + if (Ops.empty()) + return getConstant(Ty, 0); + if (Ops.size() == 1) + return Ops[0]; + return getAddExpr(Ops); + } + } + + // If we are adding something to a multiply expression, make sure the + // something is not already an operand of the multiply. If so, merge it into + // the multiply. + for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) { + const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]); + for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) { + const SCEV *MulOpSCEV = Mul->getOperand(MulOp); + if (isa<SCEVConstant>(MulOpSCEV)) + continue; + for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp) + if (MulOpSCEV == Ops[AddOp]) { + // Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1)) + const SCEV *InnerMul = Mul->getOperand(MulOp == 0); + if (Mul->getNumOperands() != 2) { + // If the multiply has more than two operands, we must get the + // Y*Z term. + SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(), + Mul->op_begin()+MulOp); + MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); + InnerMul = getMulExpr(MulOps); + } + const SCEV *One = getConstant(Ty, 1); + const SCEV *AddOne = getAddExpr(One, InnerMul); + const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV); + if (Ops.size() == 2) return OuterMul; + if (AddOp < Idx) { + Ops.erase(Ops.begin()+AddOp); + Ops.erase(Ops.begin()+Idx-1); + } else { + Ops.erase(Ops.begin()+Idx); + Ops.erase(Ops.begin()+AddOp-1); + } + Ops.push_back(OuterMul); + return getAddExpr(Ops); + } + + // Check this multiply against other multiplies being added together. + for (unsigned OtherMulIdx = Idx+1; + OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]); + ++OtherMulIdx) { + const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]); + // If MulOp occurs in OtherMul, we can fold the two multiplies + // together. + for (unsigned OMulOp = 0, e = OtherMul->getNumOperands(); + OMulOp != e; ++OMulOp) + if (OtherMul->getOperand(OMulOp) == MulOpSCEV) { + // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E)) + const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0); + if (Mul->getNumOperands() != 2) { + SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(), + Mul->op_begin()+MulOp); + MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); + InnerMul1 = getMulExpr(MulOps); + } + const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0); + if (OtherMul->getNumOperands() != 2) { + SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(), + OtherMul->op_begin()+OMulOp); + MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end()); + InnerMul2 = getMulExpr(MulOps); + } + const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2); + const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum); + if (Ops.size() == 2) return OuterMul; + Ops.erase(Ops.begin()+Idx); + Ops.erase(Ops.begin()+OtherMulIdx-1); + Ops.push_back(OuterMul); + return getAddExpr(Ops); + } + } + } + } + + // If there are any add recurrences in the operands list, see if any other + // added values are loop invariant. If so, we can fold them into the + // recurrence. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr) + ++Idx; + + // Scan over all recurrences, trying to fold loop invariants into them. + for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) { + // Scan all of the other operands to this add and add them to the vector if + // they are loop invariant w.r.t. the recurrence. + SmallVector<const SCEV *, 8> LIOps; + const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]); + const Loop *AddRecLoop = AddRec->getLoop(); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (isLoopInvariant(Ops[i], AddRecLoop)) { + LIOps.push_back(Ops[i]); + Ops.erase(Ops.begin()+i); + --i; --e; + } + + // If we found some loop invariants, fold them into the recurrence. + if (!LIOps.empty()) { + // NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step} + LIOps.push_back(AddRec->getStart()); + + SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(), + AddRec->op_end()); + AddRecOps[0] = getAddExpr(LIOps); + + // Build the new addrec. Propagate the NUW and NSW flags if both the + // outer add and the inner addrec are guaranteed to have no overflow. + // Always propagate NW. + Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW)); + const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags); + + // If all of the other operands were loop invariant, we are done. + if (Ops.size() == 1) return NewRec; + + // Otherwise, add the folded AddRec by the non-liv parts. + for (unsigned i = 0;; ++i) + if (Ops[i] == AddRec) { + Ops[i] = NewRec; + break; + } + return getAddExpr(Ops); + } + + // Okay, if there weren't any loop invariants to be folded, check to see if + // there are multiple AddRec's with the same loop induction variable being + // added together. If so, we can fold them. + for (unsigned OtherIdx = Idx+1; + OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); + ++OtherIdx) + if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) { + // Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L> + SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(), + AddRec->op_end()); + for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); + ++OtherIdx) + if (const SCEVAddRecExpr *OtherAddRec = + dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx])) + if (OtherAddRec->getLoop() == AddRecLoop) { + for (unsigned i = 0, e = OtherAddRec->getNumOperands(); + i != e; ++i) { + if (i >= AddRecOps.size()) { + AddRecOps.append(OtherAddRec->op_begin()+i, + OtherAddRec->op_end()); + break; + } + AddRecOps[i] = getAddExpr(AddRecOps[i], + OtherAddRec->getOperand(i)); + } + Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; + } + // Step size has changed, so we cannot guarantee no self-wraparound. + Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap); + return getAddExpr(Ops); + } + + // Otherwise couldn't fold anything into this recurrence. Move onto the + // next one. + } + + // Okay, it looks like we really DO need an add expr. Check to see if we + // already have one, otherwise create a new one. + FoldingSetNodeID ID; + ID.AddInteger(scAddExpr); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + ID.AddPointer(Ops[i]); + void *IP = 0; + SCEVAddExpr *S = + static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + if (!S) { + const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); + S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator), + O, Ops.size()); + UniqueSCEVs.InsertNode(S, IP); + } + S->setNoWrapFlags(Flags); + return S; +} + +/// getMulExpr - Get a canonical multiply expression, or something simpler if +/// possible. +const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, + SCEV::NoWrapFlags Flags) { + assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) && + "only nuw or nsw allowed"); + assert(!Ops.empty() && "Cannot get empty mul!"); + if (Ops.size() == 1) return Ops[0]; +#ifndef NDEBUG + const Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); + for (unsigned i = 1, e = Ops.size(); i != e; ++i) + assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && + "SCEVMulExpr operand types don't match!"); +#endif + + // If FlagNSW is true and all the operands are non-negative, infer FlagNUW. + // And vice-versa. + int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW; + SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask); + if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) { + bool All = true; + for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(), + E = Ops.end(); I != E; ++I) + if (!isKnownNonNegative(*I)) { + All = false; + break; + } + if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask); + } + + // Sort by complexity, this groups all similar expression types together. + GroupByComplexity(Ops, LI); + + // If there are any constants, fold them together. + unsigned Idx = 0; + if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { + + // C1*(C2+V) -> C1*C2 + C1*V + if (Ops.size() == 2) + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) + if (Add->getNumOperands() == 2 && + isa<SCEVConstant>(Add->getOperand(0))) + return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)), + getMulExpr(LHSC, Add->getOperand(1))); + + ++Idx; + while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { + // We found two constants, fold them together! + ConstantInt *Fold = ConstantInt::get(getContext(), + LHSC->getValue()->getValue() * + RHSC->getValue()->getValue()); + Ops[0] = getConstant(Fold); + Ops.erase(Ops.begin()+1); // Erase the folded element + if (Ops.size() == 1) return Ops[0]; + LHSC = cast<SCEVConstant>(Ops[0]); + } + + // If we are left with a constant one being multiplied, strip it off. + if (cast<SCEVConstant>(Ops[0])->getValue()->equalsInt(1)) { + Ops.erase(Ops.begin()); + --Idx; + } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) { + // If we have a multiply of zero, it will always be zero. + return Ops[0]; + } else if (Ops[0]->isAllOnesValue()) { + // If we have a mul by -1 of an add, try distributing the -1 among the + // add operands. + if (Ops.size() == 2) { + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) { + SmallVector<const SCEV *, 4> NewOps; + bool AnyFolded = false; + for (SCEVAddRecExpr::op_iterator I = Add->op_begin(), + E = Add->op_end(); I != E; ++I) { + const SCEV *Mul = getMulExpr(Ops[0], *I); + if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true; + NewOps.push_back(Mul); + } + if (AnyFolded) + return getAddExpr(NewOps); + } + else if (const SCEVAddRecExpr * + AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) { + // Negation preserves a recurrence's no self-wrap property. + SmallVector<const SCEV *, 4> Operands; + for (SCEVAddRecExpr::op_iterator I = AddRec->op_begin(), + E = AddRec->op_end(); I != E; ++I) { + Operands.push_back(getMulExpr(Ops[0], *I)); + } + return getAddRecExpr(Operands, AddRec->getLoop(), + AddRec->getNoWrapFlags(SCEV::FlagNW)); + } + } + } + + if (Ops.size() == 1) + return Ops[0]; + } + + // Skip over the add expression until we get to a multiply. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr) + ++Idx; + + // If there are mul operands inline them all into this expression. + if (Idx < Ops.size()) { + bool DeletedMul = false; + while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) { + // If we have an mul, expand the mul operands onto the end of the operands + // list. + Ops.erase(Ops.begin()+Idx); + Ops.append(Mul->op_begin(), Mul->op_end()); + DeletedMul = true; + } + + // If we deleted at least one mul, we added operands to the end of the list, + // and they are not necessarily sorted. Recurse to resort and resimplify + // any operands we just acquired. + if (DeletedMul) + return getMulExpr(Ops); + } + + // If there are any add recurrences in the operands list, see if any other + // added values are loop invariant. If so, we can fold them into the + // recurrence. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr) + ++Idx; + + // Scan over all recurrences, trying to fold loop invariants into them. + for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) { + // Scan all of the other operands to this mul and add them to the vector if + // they are loop invariant w.r.t. the recurrence. + SmallVector<const SCEV *, 8> LIOps; + const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]); + const Loop *AddRecLoop = AddRec->getLoop(); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (isLoopInvariant(Ops[i], AddRecLoop)) { + LIOps.push_back(Ops[i]); + Ops.erase(Ops.begin()+i); + --i; --e; + } + + // If we found some loop invariants, fold them into the recurrence. + if (!LIOps.empty()) { + // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step} + SmallVector<const SCEV *, 4> NewOps; + NewOps.reserve(AddRec->getNumOperands()); + const SCEV *Scale = getMulExpr(LIOps); + for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) + NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i))); + + // Build the new addrec. Propagate the NUW and NSW flags if both the + // outer mul and the inner addrec are guaranteed to have no overflow. + // + // No self-wrap cannot be guaranteed after changing the step size, but + // will be inferred if either NUW or NSW is true. + Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW)); + const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags); + + // If all of the other operands were loop invariant, we are done. + if (Ops.size() == 1) return NewRec; + + // Otherwise, multiply the folded AddRec by the non-liv parts. + for (unsigned i = 0;; ++i) + if (Ops[i] == AddRec) { + Ops[i] = NewRec; + break; + } + return getMulExpr(Ops); + } + + // Okay, if there weren't any loop invariants to be folded, check to see if + // there are multiple AddRec's with the same loop induction variable being + // multiplied together. If so, we can fold them. + for (unsigned OtherIdx = Idx+1; + OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); + ++OtherIdx) + if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) { + // F * G, where F = {A,+,B}<L> and G = {C,+,D}<L> --> + // {A*C,+,F*D + G*B + B*D}<L> + for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); + ++OtherIdx) + if (const SCEVAddRecExpr *OtherAddRec = + dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx])) + if (OtherAddRec->getLoop() == AddRecLoop) { + const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec; + const SCEV *NewStart = getMulExpr(F->getStart(), G->getStart()); + const SCEV *B = F->getStepRecurrence(*this); + const SCEV *D = G->getStepRecurrence(*this); + const SCEV *NewStep = getAddExpr(getMulExpr(F, D), + getMulExpr(G, B), + getMulExpr(B, D)); + const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep, + F->getLoop(), + SCEV::FlagAnyWrap); + if (Ops.size() == 2) return NewAddRec; + Ops[Idx] = AddRec = cast<SCEVAddRecExpr>(NewAddRec); + Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; + } + return getMulExpr(Ops); + } + + // Otherwise couldn't fold anything into this recurrence. Move onto the + // next one. + } + + // Okay, it looks like we really DO need an mul expr. Check to see if we + // already have one, otherwise create a new one. + FoldingSetNodeID ID; + ID.AddInteger(scMulExpr); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + ID.AddPointer(Ops[i]); + void *IP = 0; + SCEVMulExpr *S = + static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + if (!S) { + const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); + S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator), + O, Ops.size()); + UniqueSCEVs.InsertNode(S, IP); + } + S->setNoWrapFlags(Flags); + return S; +} + +/// getUDivExpr - Get a canonical unsigned division expression, or something +/// simpler if possible. +const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, + const SCEV *RHS) { + assert(getEffectiveSCEVType(LHS->getType()) == + getEffectiveSCEVType(RHS->getType()) && + "SCEVUDivExpr operand types don't match!"); + + if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) { + if (RHSC->getValue()->equalsInt(1)) + return LHS; // X udiv 1 --> x + // If the denominator is zero, the result of the udiv is undefined. Don't + // try to analyze it, because the resolution chosen here may differ from + // the resolution chosen in other parts of the compiler. + if (!RHSC->getValue()->isZero()) { + // Determine if the division can be folded into the operands of + // its operands. + // TODO: Generalize this to non-constants by using known-bits information. + const Type *Ty = LHS->getType(); + unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros(); + unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1; + // For non-power-of-two values, effectively round the value up to the + // nearest power of two. + if (!RHSC->getValue()->getValue().isPowerOf2()) + ++MaxShiftAmt; + const IntegerType *ExtTy = + IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt); + // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) + if (const SCEVConstant *Step = + dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) + if (!Step->getValue()->getValue() + .urem(RHSC->getValue()->getValue()) && + getZeroExtendExpr(AR, ExtTy) == + getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), + getZeroExtendExpr(Step, ExtTy), + AR->getLoop(), SCEV::FlagAnyWrap)) { + SmallVector<const SCEV *, 4> Operands; + for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i) + Operands.push_back(getUDivExpr(AR->getOperand(i), RHS)); + return getAddRecExpr(Operands, AR->getLoop(), + SCEV::FlagNW); + } + // (A*B)/C --> A*(B/C) if safe and B/C can be folded. + if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) { + SmallVector<const SCEV *, 4> Operands; + for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) + Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy)); + if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands)) + // Find an operand that's safely divisible. + for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { + const SCEV *Op = M->getOperand(i); + const SCEV *Div = getUDivExpr(Op, RHSC); + if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) { + Operands = SmallVector<const SCEV *, 4>(M->op_begin(), + M->op_end()); + Operands[i] = Div; + return getMulExpr(Operands); + } + } + } + // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded. + if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) { + SmallVector<const SCEV *, 4> Operands; + for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) + Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy)); + if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) { + Operands.clear(); + for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) { + const SCEV *Op = getUDivExpr(A->getOperand(i), RHS); + if (isa<SCEVUDivExpr>(Op) || + getMulExpr(Op, RHS) != A->getOperand(i)) + break; + Operands.push_back(Op); + } + if (Operands.size() == A->getNumOperands()) + return getAddExpr(Operands); + } + } + + // Fold if both operands are constant. + if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) { + Constant *LHSCV = LHSC->getValue(); + Constant *RHSCV = RHSC->getValue(); + return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV, + RHSCV))); + } + } + } + + FoldingSetNodeID ID; + ID.AddInteger(scUDivExpr); + ID.AddPointer(LHS); + ID.AddPointer(RHS); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator), + LHS, RHS); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + + +/// getAddRecExpr - Get an add recurrence expression for the specified loop. +/// Simplify the expression as much as possible. +const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step, + const Loop *L, + SCEV::NoWrapFlags Flags) { + SmallVector<const SCEV *, 4> Operands; + Operands.push_back(Start); + if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step)) + if (StepChrec->getLoop() == L) { + Operands.append(StepChrec->op_begin(), StepChrec->op_end()); + return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW)); + } + + Operands.push_back(Step); + return getAddRecExpr(Operands, L, Flags); +} + +/// getAddRecExpr - Get an add recurrence expression for the specified loop. +/// Simplify the expression as much as possible. +const SCEV * +ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, + const Loop *L, SCEV::NoWrapFlags Flags) { + if (Operands.size() == 1) return Operands[0]; +#ifndef NDEBUG + const Type *ETy = getEffectiveSCEVType(Operands[0]->getType()); + for (unsigned i = 1, e = Operands.size(); i != e; ++i) + assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy && + "SCEVAddRecExpr operand types don't match!"); + for (unsigned i = 0, e = Operands.size(); i != e; ++i) + assert(isLoopInvariant(Operands[i], L) && + "SCEVAddRecExpr operand is not loop-invariant!"); +#endif + + if (Operands.back()->isZero()) { + Operands.pop_back(); + return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0} --> X + } + + // It's tempting to want to call getMaxBackedgeTakenCount count here and + // use that information to infer NUW and NSW flags. However, computing a + // BE count requires calling getAddRecExpr, so we may not yet have a + // meaningful BE count at this point (and if we don't, we'd be stuck + // with a SCEVCouldNotCompute as the cached BE count). + + // If FlagNSW is true and all the operands are non-negative, infer FlagNUW. + // And vice-versa. + int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW; + SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask); + if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) { + bool All = true; + for (SmallVectorImpl<const SCEV *>::const_iterator I = Operands.begin(), + E = Operands.end(); I != E; ++I) + if (!isKnownNonNegative(*I)) { + All = false; + break; + } + if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask); + } + + // Canonicalize nested AddRecs in by nesting them in order of loop depth. + if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) { + const Loop *NestedLoop = NestedAR->getLoop(); + if (L->contains(NestedLoop) ? + (L->getLoopDepth() < NestedLoop->getLoopDepth()) : + (!NestedLoop->contains(L) && + DT->dominates(L->getHeader(), NestedLoop->getHeader()))) { + SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(), + NestedAR->op_end()); + Operands[0] = NestedAR->getStart(); + // AddRecs require their operands be loop-invariant with respect to their + // loops. Don't perform this transformation if it would break this + // requirement. + bool AllInvariant = true; + for (unsigned i = 0, e = Operands.size(); i != e; ++i) + if (!isLoopInvariant(Operands[i], L)) { + AllInvariant = false; + break; + } + if (AllInvariant) { + // Create a recurrence for the outer loop with the same step size. + // + // The outer recurrence keeps its NW flag but only keeps NUW/NSW if the + // inner recurrence has the same property. + SCEV::NoWrapFlags OuterFlags = + maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags()); + + NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags); + AllInvariant = true; + for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i) + if (!isLoopInvariant(NestedOperands[i], NestedLoop)) { + AllInvariant = false; + break; + } + if (AllInvariant) { + // Ok, both add recurrences are valid after the transformation. + // + // The inner recurrence keeps its NW flag but only keeps NUW/NSW if + // the outer recurrence has the same property. + SCEV::NoWrapFlags InnerFlags = + maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags); + return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags); + } + } + // Reset Operands to its original state. + Operands[0] = NestedAR; + } + } + + // Okay, it looks like we really DO need an addrec expr. Check to see if we + // already have one, otherwise create a new one. + FoldingSetNodeID ID; + ID.AddInteger(scAddRecExpr); + for (unsigned i = 0, e = Operands.size(); i != e; ++i) + ID.AddPointer(Operands[i]); + ID.AddPointer(L); + void *IP = 0; + SCEVAddRecExpr *S = + static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + if (!S) { + const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size()); + std::uninitialized_copy(Operands.begin(), Operands.end(), O); + S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator), + O, Operands.size(), L); + UniqueSCEVs.InsertNode(S, IP); + } + S->setNoWrapFlags(Flags); + return S; +} + +const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, + const SCEV *RHS) { + SmallVector<const SCEV *, 2> Ops; + Ops.push_back(LHS); + Ops.push_back(RHS); + return getSMaxExpr(Ops); +} + +const SCEV * +ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { + assert(!Ops.empty() && "Cannot get empty smax!"); + if (Ops.size() == 1) return Ops[0]; +#ifndef NDEBUG + const Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); + for (unsigned i = 1, e = Ops.size(); i != e; ++i) + assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && + "SCEVSMaxExpr operand types don't match!"); +#endif + + // Sort by complexity, this groups all similar expression types together. + GroupByComplexity(Ops, LI); + + // If there are any constants, fold them together. + unsigned Idx = 0; + if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { + ++Idx; + assert(Idx < Ops.size()); + while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { + // We found two constants, fold them together! + ConstantInt *Fold = ConstantInt::get(getContext(), + APIntOps::smax(LHSC->getValue()->getValue(), + RHSC->getValue()->getValue())); + Ops[0] = getConstant(Fold); + Ops.erase(Ops.begin()+1); // Erase the folded element + if (Ops.size() == 1) return Ops[0]; + LHSC = cast<SCEVConstant>(Ops[0]); + } + + // If we are left with a constant minimum-int, strip it off. + if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) { + Ops.erase(Ops.begin()); + --Idx; + } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) { + // If we have an smax with a constant maximum-int, it will always be + // maximum-int. + return Ops[0]; + } + + if (Ops.size() == 1) return Ops[0]; + } + + // Find the first SMax + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr) + ++Idx; + + // Check to see if one of the operands is an SMax. If so, expand its operands + // onto our operand list, and recurse to simplify. + if (Idx < Ops.size()) { + bool DeletedSMax = false; + while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) { + Ops.erase(Ops.begin()+Idx); + Ops.append(SMax->op_begin(), SMax->op_end()); + DeletedSMax = true; + } + + if (DeletedSMax) + return getSMaxExpr(Ops); + } + + // Okay, check to see if the same value occurs in the operand list twice. If + // so, delete one. Since we sorted the list, these values are required to + // be adjacent. + for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) + // X smax Y smax Y --> X smax Y + // X smax Y --> X, if X is always greater than Y + if (Ops[i] == Ops[i+1] || + isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) { + Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); + --i; --e; + } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) { + Ops.erase(Ops.begin()+i, Ops.begin()+i+1); + --i; --e; + } + + if (Ops.size() == 1) return Ops[0]; + + assert(!Ops.empty() && "Reduced smax down to nothing!"); + + // Okay, it looks like we really DO need an smax expr. Check to see if we + // already have one, otherwise create a new one. + FoldingSetNodeID ID; + ID.AddInteger(scSMaxExpr); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + ID.AddPointer(Ops[i]); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); + SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator), + O, Ops.size()); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + +const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, + const SCEV *RHS) { + SmallVector<const SCEV *, 2> Ops; + Ops.push_back(LHS); + Ops.push_back(RHS); + return getUMaxExpr(Ops); +} + +const SCEV * +ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { + assert(!Ops.empty() && "Cannot get empty umax!"); + if (Ops.size() == 1) return Ops[0]; +#ifndef NDEBUG + const Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); + for (unsigned i = 1, e = Ops.size(); i != e; ++i) + assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && + "SCEVUMaxExpr operand types don't match!"); +#endif + + // Sort by complexity, this groups all similar expression types together. + GroupByComplexity(Ops, LI); + + // If there are any constants, fold them together. + unsigned Idx = 0; + if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { + ++Idx; + assert(Idx < Ops.size()); + while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { + // We found two constants, fold them together! + ConstantInt *Fold = ConstantInt::get(getContext(), + APIntOps::umax(LHSC->getValue()->getValue(), + RHSC->getValue()->getValue())); + Ops[0] = getConstant(Fold); + Ops.erase(Ops.begin()+1); // Erase the folded element + if (Ops.size() == 1) return Ops[0]; + LHSC = cast<SCEVConstant>(Ops[0]); + } + + // If we are left with a constant minimum-int, strip it off. + if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) { + Ops.erase(Ops.begin()); + --Idx; + } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) { + // If we have an umax with a constant maximum-int, it will always be + // maximum-int. + return Ops[0]; + } + + if (Ops.size() == 1) return Ops[0]; + } + + // Find the first UMax + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr) + ++Idx; + + // Check to see if one of the operands is a UMax. If so, expand its operands + // onto our operand list, and recurse to simplify. + if (Idx < Ops.size()) { + bool DeletedUMax = false; + while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) { + Ops.erase(Ops.begin()+Idx); + Ops.append(UMax->op_begin(), UMax->op_end()); + DeletedUMax = true; + } + + if (DeletedUMax) + return getUMaxExpr(Ops); + } + + // Okay, check to see if the same value occurs in the operand list twice. If + // so, delete one. Since we sorted the list, these values are required to + // be adjacent. + for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) + // X umax Y umax Y --> X umax Y + // X umax Y --> X, if X is always greater than Y + if (Ops[i] == Ops[i+1] || + isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) { + Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); + --i; --e; + } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) { + Ops.erase(Ops.begin()+i, Ops.begin()+i+1); + --i; --e; + } + + if (Ops.size() == 1) return Ops[0]; + + assert(!Ops.empty() && "Reduced umax down to nothing!"); + + // Okay, it looks like we really DO need a umax expr. Check to see if we + // already have one, otherwise create a new one. + FoldingSetNodeID ID; + ID.AddInteger(scUMaxExpr); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + ID.AddPointer(Ops[i]); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); + SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator), + O, Ops.size()); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + +const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS, + const SCEV *RHS) { + // ~smax(~x, ~y) == smin(x, y). + return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); +} + +const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, + const SCEV *RHS) { + // ~umax(~x, ~y) == umin(x, y) + return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); +} + +const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) { + // If we have TargetData, we can bypass creating a target-independent + // constant expression and then folding it back into a ConstantInt. + // This is just a compile-time optimization. + if (TD) + return getConstant(TD->getIntPtrType(getContext()), + TD->getTypeAllocSize(AllocTy)); + + Constant *C = ConstantExpr::getSizeOf(AllocTy); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + C = Folded; + const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); + return getTruncateOrZeroExtend(getSCEV(C), Ty); +} + +const SCEV *ScalarEvolution::getAlignOfExpr(const Type *AllocTy) { + Constant *C = ConstantExpr::getAlignOf(AllocTy); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + C = Folded; + const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); + return getTruncateOrZeroExtend(getSCEV(C), Ty); +} + +const SCEV *ScalarEvolution::getOffsetOfExpr(const StructType *STy, + unsigned FieldNo) { + // If we have TargetData, we can bypass creating a target-independent + // constant expression and then folding it back into a ConstantInt. + // This is just a compile-time optimization. + if (TD) + return getConstant(TD->getIntPtrType(getContext()), + TD->getStructLayout(STy)->getElementOffset(FieldNo)); + + Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + C = Folded; + const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); + return getTruncateOrZeroExtend(getSCEV(C), Ty); +} + +const SCEV *ScalarEvolution::getOffsetOfExpr(const Type *CTy, + Constant *FieldNo) { + Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + C = Folded; + const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy)); + return getTruncateOrZeroExtend(getSCEV(C), Ty); +} + +const SCEV *ScalarEvolution::getUnknown(Value *V) { + // Don't attempt to do anything other than create a SCEVUnknown object + // here. createSCEV only calls getUnknown after checking for all other + // interesting possibilities, and any other code that calls getUnknown + // is doing so in order to hide a value from SCEV canonicalization. + + FoldingSetNodeID ID; + ID.AddInteger(scUnknown); + ID.AddPointer(V); + void *IP = 0; + if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) { + assert(cast<SCEVUnknown>(S)->getValue() == V && + "Stale SCEVUnknown in uniquing map!"); + return S; + } + SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this, + FirstUnknown); + FirstUnknown = cast<SCEVUnknown>(S); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + +//===----------------------------------------------------------------------===// +// Basic SCEV Analysis and PHI Idiom Recognition Code +// + +/// isSCEVable - Test if values of the given type are analyzable within +/// the SCEV framework. This primarily includes integer types, and it +/// can optionally include pointer types if the ScalarEvolution class +/// has access to target-specific information. +bool ScalarEvolution::isSCEVable(const Type *Ty) const { + // Integers and pointers are always SCEVable. + return Ty->isIntegerTy() || Ty->isPointerTy(); +} + +/// getTypeSizeInBits - Return the size in bits of the specified type, +/// for which isSCEVable must return true. +uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const { + assert(isSCEVable(Ty) && "Type is not SCEVable!"); + + // If we have a TargetData, use it! + if (TD) + return TD->getTypeSizeInBits(Ty); + + // Integer types have fixed sizes. + if (Ty->isIntegerTy()) + return Ty->getPrimitiveSizeInBits(); + + // The only other support type is pointer. Without TargetData, conservatively + // assume pointers are 64-bit. + assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!"); + return 64; +} + +/// getEffectiveSCEVType - Return a type with the same bitwidth as +/// the given type and which represents how SCEV will treat the given +/// type, for which isSCEVable must return true. For pointer types, +/// this is the pointer-sized integer type. +const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const { + assert(isSCEVable(Ty) && "Type is not SCEVable!"); + + if (Ty->isIntegerTy()) + return Ty; + + // The only other support type is pointer. + assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!"); + if (TD) return TD->getIntPtrType(getContext()); + + // Without TargetData, conservatively assume pointers are 64-bit. + return Type::getInt64Ty(getContext()); +} + +const SCEV *ScalarEvolution::getCouldNotCompute() { + return &CouldNotCompute; +} + +/// getSCEV - Return an existing SCEV if it exists, otherwise analyze the +/// expression and create a new one. +const SCEV *ScalarEvolution::getSCEV(Value *V) { + assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); + + ValueExprMapType::const_iterator I = ValueExprMap.find(V); + if (I != ValueExprMap.end()) return I->second; + const SCEV *S = createSCEV(V); + + // The process of creating a SCEV for V may have caused other SCEVs + // to have been created, so it's necessary to insert the new entry + // from scratch, rather than trying to remember the insert position + // above. + ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S)); + return S; +} + +/// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V +/// +const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) { + if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V)) + return getConstant( + cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue()))); + + const Type *Ty = V->getType(); + Ty = getEffectiveSCEVType(Ty); + return getMulExpr(V, + getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)))); +} + +/// getNotSCEV - Return a SCEV corresponding to ~V = -1-V +const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { + if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V)) + return getConstant( + cast<ConstantInt>(ConstantExpr::getNot(VC->getValue()))); + + const Type *Ty = V->getType(); + Ty = getEffectiveSCEVType(Ty); + const SCEV *AllOnes = + getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))); + return getMinusSCEV(AllOnes, V); +} + +/// getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1. +const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, + SCEV::NoWrapFlags Flags) { + assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW"); + + // Fast path: X - X --> 0. + if (LHS == RHS) + return getConstant(LHS->getType(), 0); + + // X - Y --> X + -Y + return getAddExpr(LHS, getNegativeSCEV(RHS), Flags); +} + +/// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the +/// input value to the specified type. If the type must be extended, it is zero +/// extended. +const SCEV * +ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, const Type *Ty) { + const Type *SrcTy = V->getType(); + assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot truncate or zero extend with non-integer arguments!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) + return getTruncateExpr(V, Ty); + return getZeroExtendExpr(V, Ty); +} + +/// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion of the +/// input value to the specified type. If the type must be extended, it is sign +/// extended. +const SCEV * +ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, + const Type *Ty) { + const Type *SrcTy = V->getType(); + assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot truncate or zero extend with non-integer arguments!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) + return getTruncateExpr(V, Ty); + return getSignExtendExpr(V, Ty); +} + +/// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of the +/// input value to the specified type. If the type must be extended, it is zero +/// extended. The conversion must not be narrowing. +const SCEV * +ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) { + const Type *SrcTy = V->getType(); + assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot noop or zero extend with non-integer arguments!"); + assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && + "getNoopOrZeroExtend cannot truncate!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + return getZeroExtendExpr(V, Ty); +} + +/// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the +/// input value to the specified type. If the type must be extended, it is sign +/// extended. The conversion must not be narrowing. +const SCEV * +ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) { + const Type *SrcTy = V->getType(); + assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot noop or sign extend with non-integer arguments!"); + assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && + "getNoopOrSignExtend cannot truncate!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + return getSignExtendExpr(V, Ty); +} + +/// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of +/// the input value to the specified type. If the type must be extended, +/// it is extended with unspecified bits. The conversion must not be +/// narrowing. +const SCEV * +ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) { + const Type *SrcTy = V->getType(); + assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot noop or any extend with non-integer arguments!"); + assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && + "getNoopOrAnyExtend cannot truncate!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + return getAnyExtendExpr(V, Ty); +} + +/// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the +/// input value to the specified type. The conversion must not be widening. +const SCEV * +ScalarEvolution::getTruncateOrNoop(const SCEV *V, const Type *Ty) { + const Type *SrcTy = V->getType(); + assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + (Ty->isIntegerTy() || Ty->isPointerTy()) && + "Cannot truncate or noop with non-integer arguments!"); + assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) && + "getTruncateOrNoop cannot extend!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + return getTruncateExpr(V, Ty); +} + +/// getUMaxFromMismatchedTypes - Promote the operands to the wider of +/// the types using zero-extension, and then perform a umax operation +/// with them. +const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS, + const SCEV *RHS) { + const SCEV *PromotedLHS = LHS; + const SCEV *PromotedRHS = RHS; + + if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType())) + PromotedRHS = getZeroExtendExpr(RHS, LHS->getType()); + else + PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType()); + + return getUMaxExpr(PromotedLHS, PromotedRHS); +} + +/// getUMinFromMismatchedTypes - Promote the operands to the wider of +/// the types using zero-extension, and then perform a umin operation +/// with them. +const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS, + const SCEV *RHS) { + const SCEV *PromotedLHS = LHS; + const SCEV *PromotedRHS = RHS; + + if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType())) + PromotedRHS = getZeroExtendExpr(RHS, LHS->getType()); + else + PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType()); + + return getUMinExpr(PromotedLHS, PromotedRHS); +} + +/// getPointerBase - Transitively follow the chain of pointer-type operands +/// until reaching a SCEV that does not have a single pointer operand. This +/// returns a SCEVUnknown pointer for well-formed pointer-type expressions, +/// but corner cases do exist. +const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) { + // A pointer operand may evaluate to a nonpointer expression, such as null. + if (!V->getType()->isPointerTy()) + return V; + + if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) { + return getPointerBase(Cast->getOperand()); + } + else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) { + const SCEV *PtrOp = 0; + for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); + I != E; ++I) { + if ((*I)->getType()->isPointerTy()) { + // Cannot find the base of an expression with multiple pointer operands. + if (PtrOp) + return V; + PtrOp = *I; + } + } + if (!PtrOp) + return V; + return getPointerBase(PtrOp); + } + return V; +} + +/// PushDefUseChildren - Push users of the given Instruction +/// onto the given Worklist. +static void +PushDefUseChildren(Instruction *I, + SmallVectorImpl<Instruction *> &Worklist) { + // Push the def-use children onto the Worklist stack. + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) + Worklist.push_back(cast<Instruction>(*UI)); +} + +/// ForgetSymbolicValue - This looks up computed SCEV values for all +/// instructions that depend on the given instruction and removes them from +/// the ValueExprMapType map if they reference SymName. This is used during PHI +/// resolution. +void +ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) { + SmallVector<Instruction *, 16> Worklist; + PushDefUseChildren(PN, Worklist); + + SmallPtrSet<Instruction *, 8> Visited; + Visited.insert(PN); + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + if (!Visited.insert(I)) continue; + + ValueExprMapType::iterator It = + ValueExprMap.find(static_cast<Value *>(I)); + if (It != ValueExprMap.end()) { + const SCEV *Old = It->second; + + // Short-circuit the def-use traversal if the symbolic name + // ceases to appear in expressions. + if (Old != SymName && !hasOperand(Old, SymName)) + continue; + + // SCEVUnknown for a PHI either means that it has an unrecognized + // structure, it's a PHI that's in the progress of being computed + // by createNodeForPHI, or it's a single-value PHI. In the first case, + // additional loop trip count information isn't going to change anything. + // In the second case, createNodeForPHI will perform the necessary + // updates on its own when it gets to that point. In the third, we do + // want to forget the SCEVUnknown. + if (!isa<PHINode>(I) || + !isa<SCEVUnknown>(Old) || + (I != PN && Old == SymName)) { + forgetMemoizedResults(Old); + ValueExprMap.erase(It); + } + } + + PushDefUseChildren(I, Worklist); + } +} + +/// createNodeForPHI - PHI nodes have two cases. Either the PHI node exists in +/// a loop header, making it a potential recurrence, or it doesn't. +/// +const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { + if (const Loop *L = LI->getLoopFor(PN->getParent())) + if (L->getHeader() == PN->getParent()) { + // The loop may have multiple entrances or multiple exits; we can analyze + // this phi as an addrec if it has a unique entry value and a unique + // backedge value. + Value *BEValueV = 0, *StartValueV = 0; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = PN->getIncomingValue(i); + if (L->contains(PN->getIncomingBlock(i))) { + if (!BEValueV) { + BEValueV = V; + } else if (BEValueV != V) { + BEValueV = 0; + break; + } + } else if (!StartValueV) { + StartValueV = V; + } else if (StartValueV != V) { + StartValueV = 0; + break; + } + } + if (BEValueV && StartValueV) { + // While we are analyzing this PHI node, handle its value symbolically. + const SCEV *SymbolicName = getUnknown(PN); + assert(ValueExprMap.find(PN) == ValueExprMap.end() && + "PHI node already processed?"); + ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName)); + + // Using this symbolic name for the PHI, analyze the value coming around + // the back-edge. + const SCEV *BEValue = getSCEV(BEValueV); + + // NOTE: If BEValue is loop invariant, we know that the PHI node just + // has a special value for the first iteration of the loop. + + // If the value coming around the backedge is an add with the symbolic + // value we just inserted, then we found a simple induction variable! + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) { + // If there is a single occurrence of the symbolic value, replace it + // with a recurrence. + unsigned FoundIndex = Add->getNumOperands(); + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if (Add->getOperand(i) == SymbolicName) + if (FoundIndex == e) { + FoundIndex = i; + break; + } + + if (FoundIndex != Add->getNumOperands()) { + // Create an add with everything but the specified operand. + SmallVector<const SCEV *, 8> Ops; + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if (i != FoundIndex) + Ops.push_back(Add->getOperand(i)); + const SCEV *Accum = getAddExpr(Ops); + + // This is not a valid addrec if the step amount is varying each + // loop iteration, but is not itself an addrec in this loop. + if (isLoopInvariant(Accum, L) || + (isa<SCEVAddRecExpr>(Accum) && + cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) { + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; + + // If the increment doesn't overflow, then neither the addrec nor + // the post-increment will overflow. + if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) { + if (OBO->hasNoUnsignedWrap()) + Flags = setFlags(Flags, SCEV::FlagNUW); + if (OBO->hasNoSignedWrap()) + Flags = setFlags(Flags, SCEV::FlagNSW); + } else if (const GEPOperator *GEP = + dyn_cast<GEPOperator>(BEValueV)) { + // If the increment is an inbounds GEP, then we know the address + // space cannot be wrapped around. We cannot make any guarantee + // about signed or unsigned overflow because pointers are + // unsigned but we may have a negative index from the base + // pointer. + if (GEP->isInBounds()) + Flags = setFlags(Flags, SCEV::FlagNW); + } + + const SCEV *StartVal = getSCEV(StartValueV); + const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); + + // Since the no-wrap flags are on the increment, they apply to the + // post-incremented value as well. + if (isLoopInvariant(Accum, L)) + (void)getAddRecExpr(getAddExpr(StartVal, Accum), + Accum, L, Flags); + + // Okay, for the entire analysis of this edge we assumed the PHI + // to be symbolic. We now need to go back and purge all of the + // entries for the scalars that use the symbolic expression. + ForgetSymbolicName(PN, SymbolicName); + ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; + return PHISCEV; + } + } + } else if (const SCEVAddRecExpr *AddRec = + dyn_cast<SCEVAddRecExpr>(BEValue)) { + // Otherwise, this could be a loop like this: + // i = 0; for (j = 1; ..; ++j) { .... i = j; } + // In this case, j = {1,+,1} and BEValue is j. + // Because the other in-value of i (0) fits the evolution of BEValue + // i really is an addrec evolution. + if (AddRec->getLoop() == L && AddRec->isAffine()) { + const SCEV *StartVal = getSCEV(StartValueV); + + // If StartVal = j.start - j.stride, we can use StartVal as the + // initial step of the addrec evolution. + if (StartVal == getMinusSCEV(AddRec->getOperand(0), + AddRec->getOperand(1))) { + // FIXME: For constant StartVal, we should be able to infer + // no-wrap flags. + const SCEV *PHISCEV = + getAddRecExpr(StartVal, AddRec->getOperand(1), L, + SCEV::FlagAnyWrap); + + // Okay, for the entire analysis of this edge we assumed the PHI + // to be symbolic. We now need to go back and purge all of the + // entries for the scalars that use the symbolic expression. + ForgetSymbolicName(PN, SymbolicName); + ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; + return PHISCEV; + } + } + } + } + } + + // If the PHI has a single incoming value, follow that value, unless the + // PHI's incoming blocks are in a different loop, in which case doing so + // risks breaking LCSSA form. Instcombine would normally zap these, but + // it doesn't have DominatorTree information, so it may miss cases. + if (Value *V = SimplifyInstruction(PN, TD, DT)) + if (LI->replacementPreservesLCSSAForm(PN, V)) + return getSCEV(V); + + // If it's not a loop phi, we can't handle it yet. + return getUnknown(PN); +} + +/// createNodeForGEP - Expand GEP instructions into add and multiply +/// operations. This allows them to be analyzed by regular SCEV code. +/// +const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { + + // Don't blindly transfer the inbounds flag from the GEP instruction to the + // Add expression, because the Instruction may be guarded by control flow + // and the no-overflow bits may not be valid for the expression in any + // context. + bool isInBounds = GEP->isInBounds(); + + const Type *IntPtrTy = getEffectiveSCEVType(GEP->getType()); + Value *Base = GEP->getOperand(0); + // Don't attempt to analyze GEPs over unsized objects. + if (!cast<PointerType>(Base->getType())->getElementType()->isSized()) + return getUnknown(GEP); + const SCEV *TotalOffset = getConstant(IntPtrTy, 0); + gep_type_iterator GTI = gep_type_begin(GEP); + for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()), + E = GEP->op_end(); + I != E; ++I) { + Value *Index = *I; + // Compute the (potentially symbolic) offset in bytes for this index. + if (const StructType *STy = dyn_cast<StructType>(*GTI++)) { + // For a struct, add the member offset. + unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); + const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo); + + // Add the field offset to the running total offset. + TotalOffset = getAddExpr(TotalOffset, FieldOffset); + } else { + // For an array, add the element offset, explicitly scaled. + const SCEV *ElementSize = getSizeOfExpr(*GTI); + const SCEV *IndexS = getSCEV(Index); + // Getelementptr indices are signed. + IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy); + + // Multiply the index by the element size to compute the element offset. + const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, + isInBounds ? SCEV::FlagNSW : + SCEV::FlagAnyWrap); + + // Add the element offset to the running total offset. + TotalOffset = getAddExpr(TotalOffset, LocalOffset); + } + } + + // Get the SCEV for the GEP base. + const SCEV *BaseS = getSCEV(Base); + + // Add the total offset from all the GEP indices to the base. + return getAddExpr(BaseS, TotalOffset, + isInBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap); +} + +/// GetMinTrailingZeros - Determine the minimum number of zero bits that S is +/// guaranteed to end in (at every loop iteration). It is, at the same time, +/// the minimum number of times S is divisible by 2. For example, given {4,+,8} +/// it returns 2. If S is guaranteed to be 0, it returns the bitwidth of S. +uint32_t +ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) + return C->getValue()->getValue().countTrailingZeros(); + + if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S)) + return std::min(GetMinTrailingZeros(T->getOperand()), + (uint32_t)getTypeSizeInBits(T->getType())); + + if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) { + uint32_t OpRes = GetMinTrailingZeros(E->getOperand()); + return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ? + getTypeSizeInBits(E->getType()) : OpRes; + } + + if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) { + uint32_t OpRes = GetMinTrailingZeros(E->getOperand()); + return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ? + getTypeSizeInBits(E->getType()) : OpRes; + } + + if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) { + // The result is the min of all operands results. + uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0)); + for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i) + MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i))); + return MinOpRes; + } + + if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) { + // The result is the sum of all operands results. + uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0)); + uint32_t BitWidth = getTypeSizeInBits(M->getType()); + for (unsigned i = 1, e = M->getNumOperands(); + SumOpRes != BitWidth && i != e; ++i) + SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)), + BitWidth); + return SumOpRes; + } + + if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) { + // The result is the min of all operands results. + uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0)); + for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i) + MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i))); + return MinOpRes; + } + + if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) { + // The result is the min of all operands results. + uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0)); + for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i) + MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i))); + return MinOpRes; + } + + if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) { + // The result is the min of all operands results. + uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0)); + for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i) + MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i))); + return MinOpRes; + } + + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + // For a SCEVUnknown, ask ValueTracking. + unsigned BitWidth = getTypeSizeInBits(U->getType()); + APInt Mask = APInt::getAllOnesValue(BitWidth); + APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); + ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones); + return Zeros.countTrailingOnes(); + } + + // SCEVUDivExpr + return 0; +} + +/// getUnsignedRange - Determine the unsigned range for a particular SCEV. +/// +ConstantRange +ScalarEvolution::getUnsignedRange(const SCEV *S) { + // See if we've computed this range already. + DenseMap<const SCEV *, ConstantRange>::iterator I = UnsignedRanges.find(S); + if (I != UnsignedRanges.end()) + return I->second; + + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) + return setUnsignedRange(C, ConstantRange(C->getValue()->getValue())); + + unsigned BitWidth = getTypeSizeInBits(S->getType()); + ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true); + + // If the value has known zeros, the maximum unsigned value will have those + // known zeros as well. + uint32_t TZ = GetMinTrailingZeros(S); + if (TZ != 0) + ConservativeResult = + ConstantRange(APInt::getMinValue(BitWidth), + APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1); + + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + ConstantRange X = getUnsignedRange(Add->getOperand(0)); + for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) + X = X.add(getUnsignedRange(Add->getOperand(i))); + return setUnsignedRange(Add, ConservativeResult.intersectWith(X)); + } + + if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { + ConstantRange X = getUnsignedRange(Mul->getOperand(0)); + for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) + X = X.multiply(getUnsignedRange(Mul->getOperand(i))); + return setUnsignedRange(Mul, ConservativeResult.intersectWith(X)); + } + + if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) { + ConstantRange X = getUnsignedRange(SMax->getOperand(0)); + for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) + X = X.smax(getUnsignedRange(SMax->getOperand(i))); + return setUnsignedRange(SMax, ConservativeResult.intersectWith(X)); + } + + if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) { + ConstantRange X = getUnsignedRange(UMax->getOperand(0)); + for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) + X = X.umax(getUnsignedRange(UMax->getOperand(i))); + return setUnsignedRange(UMax, ConservativeResult.intersectWith(X)); + } + + if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { + ConstantRange X = getUnsignedRange(UDiv->getLHS()); + ConstantRange Y = getUnsignedRange(UDiv->getRHS()); + return setUnsignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y))); + } + + if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) { + ConstantRange X = getUnsignedRange(ZExt->getOperand()); + return setUnsignedRange(ZExt, + ConservativeResult.intersectWith(X.zeroExtend(BitWidth))); + } + + if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) { + ConstantRange X = getUnsignedRange(SExt->getOperand()); + return setUnsignedRange(SExt, + ConservativeResult.intersectWith(X.signExtend(BitWidth))); + } + + if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) { + ConstantRange X = getUnsignedRange(Trunc->getOperand()); + return setUnsignedRange(Trunc, + ConservativeResult.intersectWith(X.truncate(BitWidth))); + } + + if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { + // If there's no unsigned wrap, the value will never be less than its + // initial value. + if (AddRec->getNoWrapFlags(SCEV::FlagNUW)) + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart())) + if (!C->getValue()->isZero()) + ConservativeResult = + ConservativeResult.intersectWith( + ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0))); + + // TODO: non-affine addrec + if (AddRec->isAffine()) { + const Type *Ty = AddRec->getType(); + const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); + if (!isa<SCEVCouldNotCompute>(MaxBECount) && + getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) { + MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty); + + const SCEV *Start = AddRec->getStart(); + const SCEV *Step = AddRec->getStepRecurrence(*this); + + ConstantRange StartRange = getUnsignedRange(Start); + ConstantRange StepRange = getSignedRange(Step); + ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount); + ConstantRange EndRange = + StartRange.add(MaxBECountRange.multiply(StepRange)); + + // Check for overflow. This must be done with ConstantRange arithmetic + // because we could be called from within the ScalarEvolution overflow + // checking code. + ConstantRange ExtStartRange = StartRange.zextOrTrunc(BitWidth*2+1); + ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1); + ConstantRange ExtMaxBECountRange = + MaxBECountRange.zextOrTrunc(BitWidth*2+1); + ConstantRange ExtEndRange = EndRange.zextOrTrunc(BitWidth*2+1); + if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) != + ExtEndRange) + return setUnsignedRange(AddRec, ConservativeResult); + + APInt Min = APIntOps::umin(StartRange.getUnsignedMin(), + EndRange.getUnsignedMin()); + APInt Max = APIntOps::umax(StartRange.getUnsignedMax(), + EndRange.getUnsignedMax()); + if (Min.isMinValue() && Max.isMaxValue()) + return setUnsignedRange(AddRec, ConservativeResult); + return setUnsignedRange(AddRec, + ConservativeResult.intersectWith(ConstantRange(Min, Max+1))); + } + } + + return setUnsignedRange(AddRec, ConservativeResult); + } + + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + // For a SCEVUnknown, ask ValueTracking. + APInt Mask = APInt::getAllOnesValue(BitWidth); + APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); + ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD); + if (Ones == ~Zeros + 1) + return setUnsignedRange(U, ConservativeResult); + return setUnsignedRange(U, + ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1))); + } + + return setUnsignedRange(S, ConservativeResult); +} + +/// getSignedRange - Determine the signed range for a particular SCEV. +/// +ConstantRange +ScalarEvolution::getSignedRange(const SCEV *S) { + // See if we've computed this range already. + DenseMap<const SCEV *, ConstantRange>::iterator I = SignedRanges.find(S); + if (I != SignedRanges.end()) + return I->second; + + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) + return setSignedRange(C, ConstantRange(C->getValue()->getValue())); + + unsigned BitWidth = getTypeSizeInBits(S->getType()); + ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true); + + // If the value has known zeros, the maximum signed value will have those + // known zeros as well. + uint32_t TZ = GetMinTrailingZeros(S); + if (TZ != 0) + ConservativeResult = + ConstantRange(APInt::getSignedMinValue(BitWidth), + APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1); + + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + ConstantRange X = getSignedRange(Add->getOperand(0)); + for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) + X = X.add(getSignedRange(Add->getOperand(i))); + return setSignedRange(Add, ConservativeResult.intersectWith(X)); + } + + if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { + ConstantRange X = getSignedRange(Mul->getOperand(0)); + for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) + X = X.multiply(getSignedRange(Mul->getOperand(i))); + return setSignedRange(Mul, ConservativeResult.intersectWith(X)); + } + + if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) { + ConstantRange X = getSignedRange(SMax->getOperand(0)); + for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) + X = X.smax(getSignedRange(SMax->getOperand(i))); + return setSignedRange(SMax, ConservativeResult.intersectWith(X)); + } + + if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) { + ConstantRange X = getSignedRange(UMax->getOperand(0)); + for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) + X = X.umax(getSignedRange(UMax->getOperand(i))); + return setSignedRange(UMax, ConservativeResult.intersectWith(X)); + } + + if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { + ConstantRange X = getSignedRange(UDiv->getLHS()); + ConstantRange Y = getSignedRange(UDiv->getRHS()); + return setSignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y))); + } + + if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) { + ConstantRange X = getSignedRange(ZExt->getOperand()); + return setSignedRange(ZExt, + ConservativeResult.intersectWith(X.zeroExtend(BitWidth))); + } + + if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) { + ConstantRange X = getSignedRange(SExt->getOperand()); + return setSignedRange(SExt, + ConservativeResult.intersectWith(X.signExtend(BitWidth))); + } + + if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) { + ConstantRange X = getSignedRange(Trunc->getOperand()); + return setSignedRange(Trunc, + ConservativeResult.intersectWith(X.truncate(BitWidth))); + } + + if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { + // If there's no signed wrap, and all the operands have the same sign or + // zero, the value won't ever change sign. + if (AddRec->getNoWrapFlags(SCEV::FlagNSW)) { + bool AllNonNeg = true; + bool AllNonPos = true; + for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { + if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false; + if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false; + } + if (AllNonNeg) + ConservativeResult = ConservativeResult.intersectWith( + ConstantRange(APInt(BitWidth, 0), + APInt::getSignedMinValue(BitWidth))); + else if (AllNonPos) + ConservativeResult = ConservativeResult.intersectWith( + ConstantRange(APInt::getSignedMinValue(BitWidth), + APInt(BitWidth, 1))); + } + + // TODO: non-affine addrec + if (AddRec->isAffine()) { + const Type *Ty = AddRec->getType(); + const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); + if (!isa<SCEVCouldNotCompute>(MaxBECount) && + getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) { + MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty); + + const SCEV *Start = AddRec->getStart(); + const SCEV *Step = AddRec->getStepRecurrence(*this); + + ConstantRange StartRange = getSignedRange(Start); + ConstantRange StepRange = getSignedRange(Step); + ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount); + ConstantRange EndRange = + StartRange.add(MaxBECountRange.multiply(StepRange)); + + // Check for overflow. This must be done with ConstantRange arithmetic + // because we could be called from within the ScalarEvolution overflow + // checking code. + ConstantRange ExtStartRange = StartRange.sextOrTrunc(BitWidth*2+1); + ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1); + ConstantRange ExtMaxBECountRange = + MaxBECountRange.zextOrTrunc(BitWidth*2+1); + ConstantRange ExtEndRange = EndRange.sextOrTrunc(BitWidth*2+1); + if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) != + ExtEndRange) + return setSignedRange(AddRec, ConservativeResult); + + APInt Min = APIntOps::smin(StartRange.getSignedMin(), + EndRange.getSignedMin()); + APInt Max = APIntOps::smax(StartRange.getSignedMax(), + EndRange.getSignedMax()); + if (Min.isMinSignedValue() && Max.isMaxSignedValue()) + return setSignedRange(AddRec, ConservativeResult); + return setSignedRange(AddRec, + ConservativeResult.intersectWith(ConstantRange(Min, Max+1))); + } + } + + return setSignedRange(AddRec, ConservativeResult); + } + + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + // For a SCEVUnknown, ask ValueTracking. + if (!U->getValue()->getType()->isIntegerTy() && !TD) + return setSignedRange(U, ConservativeResult); + unsigned NS = ComputeNumSignBits(U->getValue(), TD); + if (NS == 1) + return setSignedRange(U, ConservativeResult); + return setSignedRange(U, ConservativeResult.intersectWith( + ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), + APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1))); + } + + return setSignedRange(S, ConservativeResult); +} + +/// createSCEV - We know that there is no SCEV for the specified value. +/// Analyze the expression. +/// +const SCEV *ScalarEvolution::createSCEV(Value *V) { + if (!isSCEVable(V->getType())) + return getUnknown(V); + + unsigned Opcode = Instruction::UserOp1; + if (Instruction *I = dyn_cast<Instruction>(V)) { + Opcode = I->getOpcode(); + + // Don't attempt to analyze instructions in blocks that aren't + // reachable. Such instructions don't matter, and they aren't required + // to obey basic rules for definitions dominating uses which this + // analysis depends on. + if (!DT->isReachableFromEntry(I->getParent())) + return getUnknown(V); + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + Opcode = CE->getOpcode(); + else if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) + return getConstant(CI); + else if (isa<ConstantPointerNull>(V)) + return getConstant(V->getType(), 0); + else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) + return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee()); + else + return getUnknown(V); + + Operator *U = cast<Operator>(V); + switch (Opcode) { + case Instruction::Add: { + // The simple thing to do would be to just call getSCEV on both operands + // and call getAddExpr with the result. However if we're looking at a + // bunch of things all added together, this can be quite inefficient, + // because it leads to N-1 getAddExpr calls for N ultimate operands. + // Instead, gather up all the operands and make a single getAddExpr call. + // LLVM IR canonical form means we need only traverse the left operands. + SmallVector<const SCEV *, 4> AddOps; + AddOps.push_back(getSCEV(U->getOperand(1))); + for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) { + unsigned Opcode = Op->getValueID() - Value::InstructionVal; + if (Opcode != Instruction::Add && Opcode != Instruction::Sub) + break; + U = cast<Operator>(Op); + const SCEV *Op1 = getSCEV(U->getOperand(1)); + if (Opcode == Instruction::Sub) + AddOps.push_back(getNegativeSCEV(Op1)); + else + AddOps.push_back(Op1); + } + AddOps.push_back(getSCEV(U->getOperand(0))); + return getAddExpr(AddOps); + } + case Instruction::Mul: { + // See the Add code above. + SmallVector<const SCEV *, 4> MulOps; + MulOps.push_back(getSCEV(U->getOperand(1))); + for (Value *Op = U->getOperand(0); + Op->getValueID() == Instruction::Mul + Value::InstructionVal; + Op = U->getOperand(0)) { + U = cast<Operator>(Op); + MulOps.push_back(getSCEV(U->getOperand(1))); + } + MulOps.push_back(getSCEV(U->getOperand(0))); + return getMulExpr(MulOps); + } + case Instruction::UDiv: + return getUDivExpr(getSCEV(U->getOperand(0)), + getSCEV(U->getOperand(1))); + case Instruction::Sub: + return getMinusSCEV(getSCEV(U->getOperand(0)), + getSCEV(U->getOperand(1))); + case Instruction::And: + // For an expression like x&255 that merely masks off the high bits, + // use zext(trunc(x)) as the SCEV expression. + if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { + if (CI->isNullValue()) + return getSCEV(U->getOperand(1)); + if (CI->isAllOnesValue()) + return getSCEV(U->getOperand(0)); + const APInt &A = CI->getValue(); + + // Instcombine's ShrinkDemandedConstant may strip bits out of + // constants, obscuring what would otherwise be a low-bits mask. + // Use ComputeMaskedBits to compute what ShrinkDemandedConstant + // knew about to reconstruct a low-bits mask value. + unsigned LZ = A.countLeadingZeros(); + unsigned BitWidth = A.getBitWidth(); + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(U->getOperand(0), AllOnes, KnownZero, KnownOne, TD); + + APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ); + + if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask)) + return + getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)), + IntegerType::get(getContext(), BitWidth - LZ)), + U->getType()); + } + break; + + case Instruction::Or: + // If the RHS of the Or is a constant, we may have something like: + // X*4+1 which got turned into X*4|1. Handle this as an Add so loop + // optimizations will transparently handle this case. + // + // In order for this transformation to be safe, the LHS must be of the + // form X*(2^n) and the Or constant must be less than 2^n. + if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { + const SCEV *LHS = getSCEV(U->getOperand(0)); + const APInt &CIVal = CI->getValue(); + if (GetMinTrailingZeros(LHS) >= + (CIVal.getBitWidth() - CIVal.countLeadingZeros())) { + // Build a plain add SCEV. + const SCEV *S = getAddExpr(LHS, getSCEV(CI)); + // If the LHS of the add was an addrec and it has no-wrap flags, + // transfer the no-wrap flags, since an or won't introduce a wrap. + if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) { + const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS); + const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags( + OldAR->getNoWrapFlags()); + } + return S; + } + } + break; + case Instruction::Xor: + if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { + // If the RHS of the xor is a signbit, then this is just an add. + // Instcombine turns add of signbit into xor as a strength reduction step. + if (CI->getValue().isSignBit()) + return getAddExpr(getSCEV(U->getOperand(0)), + getSCEV(U->getOperand(1))); + + // If the RHS of xor is -1, then this is a not operation. + if (CI->isAllOnesValue()) + return getNotSCEV(getSCEV(U->getOperand(0))); + + // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask. + // This is a variant of the check for xor with -1, and it handles + // the case where instcombine has trimmed non-demanded bits out + // of an xor with -1. + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0))) + if (ConstantInt *LCI = dyn_cast<ConstantInt>(BO->getOperand(1))) + if (BO->getOpcode() == Instruction::And && + LCI->getValue() == CI->getValue()) + if (const SCEVZeroExtendExpr *Z = + dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) { + const Type *UTy = U->getType(); + const SCEV *Z0 = Z->getOperand(); + const Type *Z0Ty = Z0->getType(); + unsigned Z0TySize = getTypeSizeInBits(Z0Ty); + + // If C is a low-bits mask, the zero extend is serving to + // mask off the high bits. Complement the operand and + // re-apply the zext. + if (APIntOps::isMask(Z0TySize, CI->getValue())) + return getZeroExtendExpr(getNotSCEV(Z0), UTy); + + // If C is a single bit, it may be in the sign-bit position + // before the zero-extend. In this case, represent the xor + // using an add, which is equivalent, and re-apply the zext. + APInt Trunc = CI->getValue().trunc(Z0TySize); + if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() && + Trunc.isSignBit()) + return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)), + UTy); + } + } + break; + + case Instruction::Shl: + // Turn shift left of a constant amount into a multiply. + if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) { + uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth(); + + // If the shift count is not less than the bitwidth, the result of + // the shift is undefined. Don't try to analyze it, because the + // resolution chosen here may differ from the resolution chosen in + // other parts of the compiler. + if (SA->getValue().uge(BitWidth)) + break; + + Constant *X = ConstantInt::get(getContext(), + APInt(BitWidth, 1).shl(SA->getZExtValue())); + return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X)); + } + break; + + case Instruction::LShr: + // Turn logical shift right of a constant into a unsigned divide. + if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) { + uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth(); + + // If the shift count is not less than the bitwidth, the result of + // the shift is undefined. Don't try to analyze it, because the + // resolution chosen here may differ from the resolution chosen in + // other parts of the compiler. + if (SA->getValue().uge(BitWidth)) + break; + + Constant *X = ConstantInt::get(getContext(), + APInt(BitWidth, 1).shl(SA->getZExtValue())); + return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X)); + } + break; + + case Instruction::AShr: + // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression. + if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) + if (Operator *L = dyn_cast<Operator>(U->getOperand(0))) + if (L->getOpcode() == Instruction::Shl && + L->getOperand(1) == U->getOperand(1)) { + uint64_t BitWidth = getTypeSizeInBits(U->getType()); + + // If the shift count is not less than the bitwidth, the result of + // the shift is undefined. Don't try to analyze it, because the + // resolution chosen here may differ from the resolution chosen in + // other parts of the compiler. + if (CI->getValue().uge(BitWidth)) + break; + + uint64_t Amt = BitWidth - CI->getZExtValue(); + if (Amt == BitWidth) + return getSCEV(L->getOperand(0)); // shift by zero --> noop + return + getSignExtendExpr(getTruncateExpr(getSCEV(L->getOperand(0)), + IntegerType::get(getContext(), + Amt)), + U->getType()); + } + break; + + case Instruction::Trunc: + return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType()); + + case Instruction::ZExt: + return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType()); + + case Instruction::SExt: + return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType()); + + case Instruction::BitCast: + // BitCasts are no-op casts so we just eliminate the cast. + if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType())) + return getSCEV(U->getOperand(0)); + break; + + // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can + // lead to pointer expressions which cannot safely be expanded to GEPs, + // because ScalarEvolution doesn't respect the GEP aliasing rules when + // simplifying integer expressions. + + case Instruction::GetElementPtr: + return createNodeForGEP(cast<GEPOperator>(U)); + + case Instruction::PHI: + return createNodeForPHI(cast<PHINode>(U)); + + case Instruction::Select: + // This could be a smax or umax that was lowered earlier. + // Try to recover it. + if (ICmpInst *ICI = dyn_cast<ICmpInst>(U->getOperand(0))) { + Value *LHS = ICI->getOperand(0); + Value *RHS = ICI->getOperand(1); + switch (ICI->getPredicate()) { + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + std::swap(LHS, RHS); + // fall through + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + // a >s b ? a+x : b+x -> smax(a, b)+x + // a >s b ? b+x : a+x -> smin(a, b)+x + if (LHS->getType() == U->getType()) { + const SCEV *LS = getSCEV(LHS); + const SCEV *RS = getSCEV(RHS); + const SCEV *LA = getSCEV(U->getOperand(1)); + const SCEV *RA = getSCEV(U->getOperand(2)); + const SCEV *LDiff = getMinusSCEV(LA, LS); + const SCEV *RDiff = getMinusSCEV(RA, RS); + if (LDiff == RDiff) + return getAddExpr(getSMaxExpr(LS, RS), LDiff); + LDiff = getMinusSCEV(LA, RS); + RDiff = getMinusSCEV(RA, LS); + if (LDiff == RDiff) + return getAddExpr(getSMinExpr(LS, RS), LDiff); + } + break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + std::swap(LHS, RHS); + // fall through + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + // a >u b ? a+x : b+x -> umax(a, b)+x + // a >u b ? b+x : a+x -> umin(a, b)+x + if (LHS->getType() == U->getType()) { + const SCEV *LS = getSCEV(LHS); + const SCEV *RS = getSCEV(RHS); + const SCEV *LA = getSCEV(U->getOperand(1)); + const SCEV *RA = getSCEV(U->getOperand(2)); + const SCEV *LDiff = getMinusSCEV(LA, LS); + const SCEV *RDiff = getMinusSCEV(RA, RS); + if (LDiff == RDiff) + return getAddExpr(getUMaxExpr(LS, RS), LDiff); + LDiff = getMinusSCEV(LA, RS); + RDiff = getMinusSCEV(RA, LS); + if (LDiff == RDiff) + return getAddExpr(getUMinExpr(LS, RS), LDiff); + } + break; + case ICmpInst::ICMP_NE: + // n != 0 ? n+x : 1+x -> umax(n, 1)+x + if (LHS->getType() == U->getType() && + isa<ConstantInt>(RHS) && + cast<ConstantInt>(RHS)->isZero()) { + const SCEV *One = getConstant(LHS->getType(), 1); + const SCEV *LS = getSCEV(LHS); + const SCEV *LA = getSCEV(U->getOperand(1)); + const SCEV *RA = getSCEV(U->getOperand(2)); + const SCEV *LDiff = getMinusSCEV(LA, LS); + const SCEV *RDiff = getMinusSCEV(RA, One); + if (LDiff == RDiff) + return getAddExpr(getUMaxExpr(One, LS), LDiff); + } + break; + case ICmpInst::ICMP_EQ: + // n == 0 ? 1+x : n+x -> umax(n, 1)+x + if (LHS->getType() == U->getType() && + isa<ConstantInt>(RHS) && + cast<ConstantInt>(RHS)->isZero()) { + const SCEV *One = getConstant(LHS->getType(), 1); + const SCEV *LS = getSCEV(LHS); + const SCEV *LA = getSCEV(U->getOperand(1)); + const SCEV *RA = getSCEV(U->getOperand(2)); + const SCEV *LDiff = getMinusSCEV(LA, One); + const SCEV *RDiff = getMinusSCEV(RA, LS); + if (LDiff == RDiff) + return getAddExpr(getUMaxExpr(One, LS), LDiff); + } + break; + default: + break; + } + } + + default: // We cannot analyze this expression. + break; + } + + return getUnknown(V); +} + + + +//===----------------------------------------------------------------------===// +// Iteration Count Computation Code +// + +/// getBackedgeTakenCount - If the specified loop has a predictable +/// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute +/// object. The backedge-taken count is the number of times the loop header +/// will be branched to from within the loop. This is one less than the +/// trip count of the loop, since it doesn't count the first iteration, +/// when the header is branched to from outside the loop. +/// +/// Note that it is not valid to call this method on a loop without a +/// loop-invariant backedge-taken count (see +/// hasLoopInvariantBackedgeTakenCount). +/// +const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) { + return getBackedgeTakenInfo(L).Exact; +} + +/// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except +/// return the least SCEV value that is known never to be less than the +/// actual backedge taken count. +const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) { + return getBackedgeTakenInfo(L).Max; +} + +/// PushLoopPHIs - Push PHI nodes in the header of the given loop +/// onto the given Worklist. +static void +PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) { + BasicBlock *Header = L->getHeader(); + + // Push all Loop-header PHIs onto the Worklist stack. + for (BasicBlock::iterator I = Header->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) + Worklist.push_back(PN); +} + +const ScalarEvolution::BackedgeTakenInfo & +ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { + // Initially insert a CouldNotCompute for this loop. If the insertion + // succeeds, proceed to actually compute a backedge-taken count and + // update the value. The temporary CouldNotCompute value tells SCEV + // code elsewhere that it shouldn't attempt to request a new + // backedge-taken count, which could result in infinite recursion. + std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair = + BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute())); + if (!Pair.second) + return Pair.first->second; + + BackedgeTakenInfo Result = getCouldNotCompute(); + BackedgeTakenInfo Computed = ComputeBackedgeTakenCount(L); + if (Computed.Exact != getCouldNotCompute()) { + assert(isLoopInvariant(Computed.Exact, L) && + isLoopInvariant(Computed.Max, L) && + "Computed backedge-taken count isn't loop invariant for loop!"); + ++NumTripCountsComputed; + + // Update the value in the map. + Result = Computed; + } else { + if (Computed.Max != getCouldNotCompute()) + // Update the value in the map. + Result = Computed; + if (isa<PHINode>(L->getHeader()->begin())) + // Only count loops that have phi nodes as not being computable. + ++NumTripCountsNotComputed; + } + + // Now that we know more about the trip count for this loop, forget any + // existing SCEV values for PHI nodes in this loop since they are only + // conservative estimates made without the benefit of trip count + // information. This is similar to the code in forgetLoop, except that + // it handles SCEVUnknown PHI nodes specially. + if (Computed.hasAnyInfo()) { + SmallVector<Instruction *, 16> Worklist; + PushLoopPHIs(L, Worklist); + + SmallPtrSet<Instruction *, 8> Visited; + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + if (!Visited.insert(I)) continue; + + ValueExprMapType::iterator It = + ValueExprMap.find(static_cast<Value *>(I)); + if (It != ValueExprMap.end()) { + const SCEV *Old = It->second; + + // SCEVUnknown for a PHI either means that it has an unrecognized + // structure, or it's a PHI that's in the progress of being computed + // by createNodeForPHI. In the former case, additional loop trip + // count information isn't going to change anything. In the later + // case, createNodeForPHI will perform the necessary updates on its + // own when it gets to that point. + if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) { + forgetMemoizedResults(Old); + ValueExprMap.erase(It); + } + if (PHINode *PN = dyn_cast<PHINode>(I)) + ConstantEvolutionLoopExitValue.erase(PN); + } + + PushDefUseChildren(I, Worklist); + } + } + + // Re-lookup the insert position, since the call to + // ComputeBackedgeTakenCount above could result in a + // recusive call to getBackedgeTakenInfo (on a different + // loop), which would invalidate the iterator computed + // earlier. + return BackedgeTakenCounts.find(L)->second = Result; +} + +/// forgetLoop - This method should be called by the client when it has +/// changed a loop in a way that may effect ScalarEvolution's ability to +/// compute a trip count, or if the loop is deleted. +void ScalarEvolution::forgetLoop(const Loop *L) { + // Drop any stored trip count value. + BackedgeTakenCounts.erase(L); + + // Drop information about expressions based on loop-header PHIs. + SmallVector<Instruction *, 16> Worklist; + PushLoopPHIs(L, Worklist); + + SmallPtrSet<Instruction *, 8> Visited; + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + if (!Visited.insert(I)) continue; + + ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I)); + if (It != ValueExprMap.end()) { + forgetMemoizedResults(It->second); + ValueExprMap.erase(It); + if (PHINode *PN = dyn_cast<PHINode>(I)) + ConstantEvolutionLoopExitValue.erase(PN); + } + + PushDefUseChildren(I, Worklist); + } + + // Forget all contained loops too, to avoid dangling entries in the + // ValuesAtScopes map. + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + forgetLoop(*I); +} + +/// forgetValue - This method should be called by the client when it has +/// changed a value in a way that may effect its value, or which may +/// disconnect it from a def-use chain linking it to a loop. +void ScalarEvolution::forgetValue(Value *V) { + Instruction *I = dyn_cast<Instruction>(V); + if (!I) return; + + // Drop information about expressions based on loop-header PHIs. + SmallVector<Instruction *, 16> Worklist; + Worklist.push_back(I); + + SmallPtrSet<Instruction *, 8> Visited; + while (!Worklist.empty()) { + I = Worklist.pop_back_val(); + if (!Visited.insert(I)) continue; + + ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I)); + if (It != ValueExprMap.end()) { + forgetMemoizedResults(It->second); + ValueExprMap.erase(It); + if (PHINode *PN = dyn_cast<PHINode>(I)) + ConstantEvolutionLoopExitValue.erase(PN); + } + + PushDefUseChildren(I, Worklist); + } +} + +/// ComputeBackedgeTakenCount - Compute the number of times the backedge +/// of the specified loop will execute. +ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { + SmallVector<BasicBlock *, 8> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + // Examine all exits and pick the most conservative values. + const SCEV *BECount = getCouldNotCompute(); + const SCEV *MaxBECount = getCouldNotCompute(); + bool CouldNotComputeBECount = false; + for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { + BackedgeTakenInfo NewBTI = + ComputeBackedgeTakenCountFromExit(L, ExitingBlocks[i]); + + if (NewBTI.Exact == getCouldNotCompute()) { + // We couldn't compute an exact value for this exit, so + // we won't be able to compute an exact value for the loop. + CouldNotComputeBECount = true; + BECount = getCouldNotCompute(); + } else if (!CouldNotComputeBECount) { + if (BECount == getCouldNotCompute()) + BECount = NewBTI.Exact; + else + BECount = getUMinFromMismatchedTypes(BECount, NewBTI.Exact); + } + if (MaxBECount == getCouldNotCompute()) + MaxBECount = NewBTI.Max; + else if (NewBTI.Max != getCouldNotCompute()) + MaxBECount = getUMinFromMismatchedTypes(MaxBECount, NewBTI.Max); + } + + return BackedgeTakenInfo(BECount, MaxBECount); +} + +/// ComputeBackedgeTakenCountFromExit - Compute the number of times the backedge +/// of the specified loop will execute if it exits via the specified block. +ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L, + BasicBlock *ExitingBlock) { + + // Okay, we've chosen an exiting block. See what condition causes us to + // exit at this block. + // + // FIXME: we should be able to handle switch instructions (with a single exit) + BranchInst *ExitBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); + if (ExitBr == 0) return getCouldNotCompute(); + assert(ExitBr->isConditional() && "If unconditional, it can't be in loop!"); + + // At this point, we know we have a conditional branch that determines whether + // the loop is exited. However, we don't know if the branch is executed each + // time through the loop. If not, then the execution count of the branch will + // not be equal to the trip count of the loop. + // + // Currently we check for this by checking to see if the Exit branch goes to + // the loop header. If so, we know it will always execute the same number of + // times as the loop. We also handle the case where the exit block *is* the + // loop header. This is common for un-rotated loops. + // + // If both of those tests fail, walk up the unique predecessor chain to the + // header, stopping if there is an edge that doesn't exit the loop. If the + // header is reached, the execution count of the branch will be equal to the + // trip count of the loop. + // + // More extensive analysis could be done to handle more cases here. + // + if (ExitBr->getSuccessor(0) != L->getHeader() && + ExitBr->getSuccessor(1) != L->getHeader() && + ExitBr->getParent() != L->getHeader()) { + // The simple checks failed, try climbing the unique predecessor chain + // up to the header. + bool Ok = false; + for (BasicBlock *BB = ExitBr->getParent(); BB; ) { + BasicBlock *Pred = BB->getUniquePredecessor(); + if (!Pred) + return getCouldNotCompute(); + TerminatorInst *PredTerm = Pred->getTerminator(); + for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) { + BasicBlock *PredSucc = PredTerm->getSuccessor(i); + if (PredSucc == BB) + continue; + // If the predecessor has a successor that isn't BB and isn't + // outside the loop, assume the worst. + if (L->contains(PredSucc)) + return getCouldNotCompute(); + } + if (Pred == L->getHeader()) { + Ok = true; + break; + } + BB = Pred; + } + if (!Ok) + return getCouldNotCompute(); + } + + // Proceed to the next level to examine the exit condition expression. + return ComputeBackedgeTakenCountFromExitCond(L, ExitBr->getCondition(), + ExitBr->getSuccessor(0), + ExitBr->getSuccessor(1)); +} + +/// ComputeBackedgeTakenCountFromExitCond - Compute the number of times the +/// backedge of the specified loop will execute if its exit condition +/// were a conditional branch of ExitCond, TBB, and FBB. +ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L, + Value *ExitCond, + BasicBlock *TBB, + BasicBlock *FBB) { + // Check if the controlling expression for this loop is an And or Or. + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) { + if (BO->getOpcode() == Instruction::And) { + // Recurse on the operands of the and. + BackedgeTakenInfo BTI0 = + ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB); + BackedgeTakenInfo BTI1 = + ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB); + const SCEV *BECount = getCouldNotCompute(); + const SCEV *MaxBECount = getCouldNotCompute(); + if (L->contains(TBB)) { + // Both conditions must be true for the loop to continue executing. + // Choose the less conservative count. + if (BTI0.Exact == getCouldNotCompute() || + BTI1.Exact == getCouldNotCompute()) + BECount = getCouldNotCompute(); + else + BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact); + if (BTI0.Max == getCouldNotCompute()) + MaxBECount = BTI1.Max; + else if (BTI1.Max == getCouldNotCompute()) + MaxBECount = BTI0.Max; + else + MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max); + } else { + // Both conditions must be true at the same time for the loop to exit. + // For now, be conservative. + assert(L->contains(FBB) && "Loop block has no successor in loop!"); + if (BTI0.Max == BTI1.Max) + MaxBECount = BTI0.Max; + if (BTI0.Exact == BTI1.Exact) + BECount = BTI0.Exact; + } + + return BackedgeTakenInfo(BECount, MaxBECount); + } + if (BO->getOpcode() == Instruction::Or) { + // Recurse on the operands of the or. + BackedgeTakenInfo BTI0 = + ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB); + BackedgeTakenInfo BTI1 = + ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB); + const SCEV *BECount = getCouldNotCompute(); + const SCEV *MaxBECount = getCouldNotCompute(); + if (L->contains(FBB)) { + // Both conditions must be false for the loop to continue executing. + // Choose the less conservative count. + if (BTI0.Exact == getCouldNotCompute() || + BTI1.Exact == getCouldNotCompute()) + BECount = getCouldNotCompute(); + else + BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact); + if (BTI0.Max == getCouldNotCompute()) + MaxBECount = BTI1.Max; + else if (BTI1.Max == getCouldNotCompute()) + MaxBECount = BTI0.Max; + else + MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max); + } else { + // Both conditions must be false at the same time for the loop to exit. + // For now, be conservative. + assert(L->contains(TBB) && "Loop block has no successor in loop!"); + if (BTI0.Max == BTI1.Max) + MaxBECount = BTI0.Max; + if (BTI0.Exact == BTI1.Exact) + BECount = BTI0.Exact; + } + + return BackedgeTakenInfo(BECount, MaxBECount); + } + } + + // With an icmp, it may be feasible to compute an exact backedge-taken count. + // Proceed to the next level to examine the icmp. + if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) + return ComputeBackedgeTakenCountFromExitCondICmp(L, ExitCondICmp, TBB, FBB); + + // Check for a constant condition. These are normally stripped out by + // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to + // preserve the CFG and is temporarily leaving constant conditions + // in place. + if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) { + if (L->contains(FBB) == !CI->getZExtValue()) + // The backedge is always taken. + return getCouldNotCompute(); + else + // The backedge is never taken. + return getConstant(CI->getType(), 0); + } + + // If it's not an integer or pointer comparison then compute it the hard way. + return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB)); +} + +/// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of times the +/// backedge of the specified loop will execute if its exit condition +/// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB. +ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, + ICmpInst *ExitCond, + BasicBlock *TBB, + BasicBlock *FBB) { + + // If the condition was exit on true, convert the condition to exit on false + ICmpInst::Predicate Cond; + if (!L->contains(FBB)) + Cond = ExitCond->getPredicate(); + else + Cond = ExitCond->getInversePredicate(); + + // Handle common loops like: for (X = "string"; *X; ++X) + if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0))) + if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) { + BackedgeTakenInfo ItCnt = + ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond); + if (ItCnt.hasAnyInfo()) + return ItCnt; + } + + const SCEV *LHS = getSCEV(ExitCond->getOperand(0)); + const SCEV *RHS = getSCEV(ExitCond->getOperand(1)); + + // Try to evaluate any dependencies out of the loop. + LHS = getSCEVAtScope(LHS, L); + RHS = getSCEVAtScope(RHS, L); + + // At this point, we would like to compute how many iterations of the + // loop the predicate will return true for these inputs. + if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) { + // If there is a loop-invariant, force it into the RHS. + std::swap(LHS, RHS); + Cond = ICmpInst::getSwappedPredicate(Cond); + } + + // Simplify the operands before analyzing them. + (void)SimplifyICmpOperands(Cond, LHS, RHS); + + // If we have a comparison of a chrec against a constant, try to use value + // ranges to answer this query. + if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) + if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS)) + if (AddRec->getLoop() == L) { + // Form the constant range. + ConstantRange CompRange( + ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue())); + + const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this); + if (!isa<SCEVCouldNotCompute>(Ret)) return Ret; + } + + switch (Cond) { + case ICmpInst::ICMP_NE: { // while (X != Y) + // Convert to: while (X-Y != 0) + BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEV(LHS, RHS), L); + if (BTI.hasAnyInfo()) return BTI; + break; + } + case ICmpInst::ICMP_EQ: { // while (X == Y) + // Convert to: while (X-Y == 0) + BackedgeTakenInfo BTI = HowFarToNonZero(getMinusSCEV(LHS, RHS), L); + if (BTI.hasAnyInfo()) return BTI; + break; + } + case ICmpInst::ICMP_SLT: { + BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, true); + if (BTI.hasAnyInfo()) return BTI; + break; + } + case ICmpInst::ICMP_SGT: { + BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS), + getNotSCEV(RHS), L, true); + if (BTI.hasAnyInfo()) return BTI; + break; + } + case ICmpInst::ICMP_ULT: { + BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, false); + if (BTI.hasAnyInfo()) return BTI; + break; + } + case ICmpInst::ICMP_UGT: { + BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS), + getNotSCEV(RHS), L, false); + if (BTI.hasAnyInfo()) return BTI; + break; + } + default: +#if 0 + dbgs() << "ComputeBackedgeTakenCount "; + if (ExitCond->getOperand(0)->getType()->isUnsigned()) + dbgs() << "[unsigned] "; + dbgs() << *LHS << " " + << Instruction::getOpcodeName(Instruction::ICmp) + << " " << *RHS << "\n"; +#endif + break; + } + return + ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB)); +} + +static ConstantInt * +EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, + ScalarEvolution &SE) { + const SCEV *InVal = SE.getConstant(C); + const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE); + assert(isa<SCEVConstant>(Val) && + "Evaluation of SCEV at constant didn't fold correctly?"); + return cast<SCEVConstant>(Val)->getValue(); +} + +/// GetAddressedElementFromGlobal - Given a global variable with an initializer +/// and a GEP expression (missing the pointer index) indexing into it, return +/// the addressed element of the initializer or null if the index expression is +/// invalid. +static Constant * +GetAddressedElementFromGlobal(GlobalVariable *GV, + const std::vector<ConstantInt*> &Indices) { + Constant *Init = GV->getInitializer(); + for (unsigned i = 0, e = Indices.size(); i != e; ++i) { + uint64_t Idx = Indices[i]->getZExtValue(); + if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) { + assert(Idx < CS->getNumOperands() && "Bad struct index!"); + Init = cast<Constant>(CS->getOperand(Idx)); + } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) { + if (Idx >= CA->getNumOperands()) return 0; // Bogus program + Init = cast<Constant>(CA->getOperand(Idx)); + } else if (isa<ConstantAggregateZero>(Init)) { + if (const StructType *STy = dyn_cast<StructType>(Init->getType())) { + assert(Idx < STy->getNumElements() && "Bad struct index!"); + Init = Constant::getNullValue(STy->getElementType(Idx)); + } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Init->getType())) { + if (Idx >= ATy->getNumElements()) return 0; // Bogus program + Init = Constant::getNullValue(ATy->getElementType()); + } else { + llvm_unreachable("Unknown constant aggregate type!"); + } + return 0; + } else { + return 0; // Unknown initializer type + } + } + return Init; +} + +/// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition of +/// 'icmp op load X, cst', try to see if we can compute the backedge +/// execution count. +ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount( + LoadInst *LI, + Constant *RHS, + const Loop *L, + ICmpInst::Predicate predicate) { + if (LI->isVolatile()) return getCouldNotCompute(); + + // Check to see if the loaded pointer is a getelementptr of a global. + // TODO: Use SCEV instead of manually grubbing with GEPs. + GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)); + if (!GEP) return getCouldNotCompute(); + + // Make sure that it is really a constant global we are gepping, with an + // initializer, and make sure the first IDX is really 0. + GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)); + if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() || + GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) || + !cast<Constant>(GEP->getOperand(1))->isNullValue()) + return getCouldNotCompute(); + + // Okay, we allow one non-constant index into the GEP instruction. + Value *VarIdx = 0; + std::vector<ConstantInt*> Indexes; + unsigned VarIdxNum = 0; + for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i) + if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) { + Indexes.push_back(CI); + } else if (!isa<ConstantInt>(GEP->getOperand(i))) { + if (VarIdx) return getCouldNotCompute(); // Multiple non-constant idx's. + VarIdx = GEP->getOperand(i); + VarIdxNum = i-2; + Indexes.push_back(0); + } + + // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant. + // Check to see if X is a loop variant variable value now. + const SCEV *Idx = getSCEV(VarIdx); + Idx = getSCEVAtScope(Idx, L); + + // We can only recognize very limited forms of loop index expressions, in + // particular, only affine AddRec's like {C1,+,C2}. + const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx); + if (!IdxExpr || !IdxExpr->isAffine() || isLoopInvariant(IdxExpr, L) || + !isa<SCEVConstant>(IdxExpr->getOperand(0)) || + !isa<SCEVConstant>(IdxExpr->getOperand(1))) + return getCouldNotCompute(); + + unsigned MaxSteps = MaxBruteForceIterations; + for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) { + ConstantInt *ItCst = ConstantInt::get( + cast<IntegerType>(IdxExpr->getType()), IterationNum); + ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this); + + // Form the GEP offset. + Indexes[VarIdxNum] = Val; + + Constant *Result = GetAddressedElementFromGlobal(GV, Indexes); + if (Result == 0) break; // Cannot compute! + + // Evaluate the condition for this iteration. + Result = ConstantExpr::getICmp(predicate, Result, RHS); + if (!isa<ConstantInt>(Result)) break; // Couldn't decide for sure + if (cast<ConstantInt>(Result)->getValue().isMinValue()) { +#if 0 + dbgs() << "\n***\n*** Computed loop count " << *ItCst + << "\n*** From global " << *GV << "*** BB: " << *L->getHeader() + << "***\n"; +#endif + ++NumArrayLenItCounts; + return getConstant(ItCst); // Found terminating iteration! + } + } + return getCouldNotCompute(); +} + + +/// CanConstantFold - Return true if we can constant fold an instruction of the +/// specified type, assuming that all operands were constants. +static bool CanConstantFold(const Instruction *I) { + if (isa<BinaryOperator>(I) || isa<CmpInst>(I) || + isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I)) + return true; + + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (const Function *F = CI->getCalledFunction()) + return canConstantFoldCallTo(F); + return false; +} + +/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node +/// in the loop that V is derived from. We allow arbitrary operations along the +/// way, but the operands of an operation must either be constants or a value +/// derived from a constant PHI. If this expression does not fit with these +/// constraints, return null. +static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { + // If this is not an instruction, or if this is an instruction outside of the + // loop, it can't be derived from a loop PHI. + Instruction *I = dyn_cast<Instruction>(V); + if (I == 0 || !L->contains(I)) return 0; + + if (PHINode *PN = dyn_cast<PHINode>(I)) { + if (L->getHeader() == I->getParent()) + return PN; + else + // We don't currently keep track of the control flow needed to evaluate + // PHIs, so we cannot handle PHIs inside of loops. + return 0; + } + + // If we won't be able to constant fold this expression even if the operands + // are constants, return early. + if (!CanConstantFold(I)) return 0; + + // Otherwise, we can evaluate this instruction if all of its operands are + // constant or derived from a PHI node themselves. + PHINode *PHI = 0; + for (unsigned Op = 0, e = I->getNumOperands(); Op != e; ++Op) + if (!isa<Constant>(I->getOperand(Op))) { + PHINode *P = getConstantEvolvingPHI(I->getOperand(Op), L); + if (P == 0) return 0; // Not evolving from PHI + if (PHI == 0) + PHI = P; + else if (PHI != P) + return 0; // Evolving from multiple different PHIs. + } + + // This is a expression evolving from a constant PHI! + return PHI; +} + +/// EvaluateExpression - Given an expression that passes the +/// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node +/// in the loop has the value PHIVal. If we can't fold this expression for some +/// reason, return null. +static Constant *EvaluateExpression(Value *V, Constant *PHIVal, + const TargetData *TD) { + if (isa<PHINode>(V)) return PHIVal; + if (Constant *C = dyn_cast<Constant>(V)) return C; + Instruction *I = cast<Instruction>(V); + + std::vector<Constant*> Operands(I->getNumOperands()); + + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + Operands[i] = EvaluateExpression(I->getOperand(i), PHIVal, TD); + if (Operands[i] == 0) return 0; + } + + if (const CmpInst *CI = dyn_cast<CmpInst>(I)) + return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], + Operands[1], TD); + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), + &Operands[0], Operands.size(), TD); +} + +/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is +/// in the header of its containing loop, we know the loop executes a +/// constant number of times, and the PHI node is just a recurrence +/// involving constants, fold it. +Constant * +ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, + const APInt &BEs, + const Loop *L) { + DenseMap<PHINode*, Constant*>::const_iterator I = + ConstantEvolutionLoopExitValue.find(PN); + if (I != ConstantEvolutionLoopExitValue.end()) + return I->second; + + if (BEs.ugt(MaxBruteForceIterations)) + return ConstantEvolutionLoopExitValue[PN] = 0; // Not going to evaluate it. + + Constant *&RetVal = ConstantEvolutionLoopExitValue[PN]; + + // Since the loop is canonicalized, the PHI node must have two entries. One + // entry must be a constant (coming in from outside of the loop), and the + // second must be derived from the same PHI. + bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); + Constant *StartCST = + dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge)); + if (StartCST == 0) + return RetVal = 0; // Must be a constant. + + Value *BEValue = PN->getIncomingValue(SecondIsBackedge); + if (getConstantEvolvingPHI(BEValue, L) != PN && + !isa<Constant>(BEValue)) + return RetVal = 0; // Not derived from same PHI. + + // Execute the loop symbolically to determine the exit value. + if (BEs.getActiveBits() >= 32) + return RetVal = 0; // More than 2^32-1 iterations?? Not doing it! + + unsigned NumIterations = BEs.getZExtValue(); // must be in range + unsigned IterationNum = 0; + for (Constant *PHIVal = StartCST; ; ++IterationNum) { + if (IterationNum == NumIterations) + return RetVal = PHIVal; // Got exit value! + + // Compute the value of the PHI node for the next iteration. + Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD); + if (NextPHI == PHIVal) + return RetVal = NextPHI; // Stopped evolving! + if (NextPHI == 0) + return 0; // Couldn't evaluate! + PHIVal = NextPHI; + } +} + +/// ComputeBackedgeTakenCountExhaustively - If the loop is known to execute a +/// constant number of times (the condition evolves only from constants), +/// try to evaluate a few iterations of the loop until we get the exit +/// condition gets a value of ExitWhen (true or false). If we cannot +/// evaluate the trip count of the loop, return getCouldNotCompute(). +const SCEV * +ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L, + Value *Cond, + bool ExitWhen) { + PHINode *PN = getConstantEvolvingPHI(Cond, L); + if (PN == 0) return getCouldNotCompute(); + + // If the loop is canonicalized, the PHI will have exactly two entries. + // That's the only form we support here. + if (PN->getNumIncomingValues() != 2) return getCouldNotCompute(); + + // One entry must be a constant (coming in from outside of the loop), and the + // second must be derived from the same PHI. + bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); + Constant *StartCST = + dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge)); + if (StartCST == 0) return getCouldNotCompute(); // Must be a constant. + + Value *BEValue = PN->getIncomingValue(SecondIsBackedge); + if (getConstantEvolvingPHI(BEValue, L) != PN && + !isa<Constant>(BEValue)) + return getCouldNotCompute(); // Not derived from same PHI. + + // Okay, we find a PHI node that defines the trip count of this loop. Execute + // the loop symbolically to determine when the condition gets a value of + // "ExitWhen". + unsigned IterationNum = 0; + unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. + for (Constant *PHIVal = StartCST; + IterationNum != MaxIterations; ++IterationNum) { + ConstantInt *CondVal = + dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, PHIVal, TD)); + + // Couldn't symbolically evaluate. + if (!CondVal) return getCouldNotCompute(); + + if (CondVal->getValue() == uint64_t(ExitWhen)) { + ++NumBruteForceTripCountsComputed; + return getConstant(Type::getInt32Ty(getContext()), IterationNum); + } + + // Compute the value of the PHI node for the next iteration. + Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD); + if (NextPHI == 0 || NextPHI == PHIVal) + return getCouldNotCompute();// Couldn't evaluate or not making progress... + PHIVal = NextPHI; + } + + // Too many iterations were needed to evaluate. + return getCouldNotCompute(); +} + +/// getSCEVAtScope - Return a SCEV expression for the specified value +/// at the specified scope in the program. The L value specifies a loop +/// nest to evaluate the expression at, where null is the top-level or a +/// specified loop is immediately inside of the loop. +/// +/// This method can be used to compute the exit value for a variable defined +/// in a loop by querying what the value will hold in the parent loop. +/// +/// In the case that a relevant loop exit value cannot be computed, the +/// original value V is returned. +const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { + // Check to see if we've folded this expression at this loop before. + std::map<const Loop *, const SCEV *> &Values = ValuesAtScopes[V]; + std::pair<std::map<const Loop *, const SCEV *>::iterator, bool> Pair = + Values.insert(std::make_pair(L, static_cast<const SCEV *>(0))); + if (!Pair.second) + return Pair.first->second ? Pair.first->second : V; + + // Otherwise compute it. + const SCEV *C = computeSCEVAtScope(V, L); + ValuesAtScopes[V][L] = C; + return C; +} + +const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { + if (isa<SCEVConstant>(V)) return V; + + // If this instruction is evolved from a constant-evolving PHI, compute the + // exit value from the loop without using SCEVs. + if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) { + if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) { + const Loop *LI = (*this->LI)[I->getParent()]; + if (LI && LI->getParentLoop() == L) // Looking for loop exit value. + if (PHINode *PN = dyn_cast<PHINode>(I)) + if (PN->getParent() == LI->getHeader()) { + // Okay, there is no closed form solution for the PHI node. Check + // to see if the loop that contains it has a known backedge-taken + // count. If so, we may be able to force computation of the exit + // value. + const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI); + if (const SCEVConstant *BTCC = + dyn_cast<SCEVConstant>(BackedgeTakenCount)) { + // Okay, we know how many times the containing loop executes. If + // this is a constant evolving PHI node, get the final value at + // the specified iteration number. + Constant *RV = getConstantEvolutionLoopExitValue(PN, + BTCC->getValue()->getValue(), + LI); + if (RV) return getSCEV(RV); + } + } + + // Okay, this is an expression that we cannot symbolically evaluate + // into a SCEV. Check to see if it's possible to symbolically evaluate + // the arguments into constants, and if so, try to constant propagate the + // result. This is particularly useful for computing loop exit values. + if (CanConstantFold(I)) { + SmallVector<Constant *, 4> Operands; + bool MadeImprovement = false; + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + Value *Op = I->getOperand(i); + if (Constant *C = dyn_cast<Constant>(Op)) { + Operands.push_back(C); + continue; + } + + // If any of the operands is non-constant and if they are + // non-integer and non-pointer, don't even try to analyze them + // with scev techniques. + if (!isSCEVable(Op->getType())) + return V; + + const SCEV *OrigV = getSCEV(Op); + const SCEV *OpV = getSCEVAtScope(OrigV, L); + MadeImprovement |= OrigV != OpV; + + Constant *C = 0; + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OpV)) + C = SC->getValue(); + if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(OpV)) + C = dyn_cast<Constant>(SU->getValue()); + if (!C) return V; + if (C->getType() != Op->getType()) + C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, + Op->getType(), + false), + C, Op->getType()); + Operands.push_back(C); + } + + // Check to see if getSCEVAtScope actually made an improvement. + if (MadeImprovement) { + Constant *C = 0; + if (const CmpInst *CI = dyn_cast<CmpInst>(I)) + C = ConstantFoldCompareInstOperands(CI->getPredicate(), + Operands[0], Operands[1], TD); + else + C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), + &Operands[0], Operands.size(), TD); + if (!C) return V; + return getSCEV(C); + } + } + } + + // This is some other type of SCEVUnknown, just return it. + return V; + } + + if (const SCEVCommutativeExpr *Comm = dyn_cast<SCEVCommutativeExpr>(V)) { + // Avoid performing the look-up in the common case where the specified + // expression has no loop-variant portions. + for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) { + const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L); + if (OpAtScope != Comm->getOperand(i)) { + // Okay, at least one of these operands is loop variant but might be + // foldable. Build a new instance of the folded commutative expression. + SmallVector<const SCEV *, 8> NewOps(Comm->op_begin(), + Comm->op_begin()+i); + NewOps.push_back(OpAtScope); + + for (++i; i != e; ++i) { + OpAtScope = getSCEVAtScope(Comm->getOperand(i), L); + NewOps.push_back(OpAtScope); + } + if (isa<SCEVAddExpr>(Comm)) + return getAddExpr(NewOps); + if (isa<SCEVMulExpr>(Comm)) + return getMulExpr(NewOps); + if (isa<SCEVSMaxExpr>(Comm)) + return getSMaxExpr(NewOps); + if (isa<SCEVUMaxExpr>(Comm)) + return getUMaxExpr(NewOps); + llvm_unreachable("Unknown commutative SCEV type!"); + } + } + // If we got here, all operands are loop invariant. + return Comm; + } + + if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) { + const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L); + const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L); + if (LHS == Div->getLHS() && RHS == Div->getRHS()) + return Div; // must be loop invariant + return getUDivExpr(LHS, RHS); + } + + // If this is a loop recurrence for a loop that does not contain L, then we + // are dealing with the final value computed by the loop. + if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) { + // First, attempt to evaluate each operand. + // Avoid performing the look-up in the common case where the specified + // expression has no loop-variant portions. + for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { + const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L); + if (OpAtScope == AddRec->getOperand(i)) + continue; + + // Okay, at least one of these operands is loop variant but might be + // foldable. Build a new instance of the folded commutative expression. + SmallVector<const SCEV *, 8> NewOps(AddRec->op_begin(), + AddRec->op_begin()+i); + NewOps.push_back(OpAtScope); + for (++i; i != e; ++i) + NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L)); + + const SCEV *FoldedRec = + getAddRecExpr(NewOps, AddRec->getLoop(), + AddRec->getNoWrapFlags(SCEV::FlagNW)); + AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec); + // The addrec may be folded to a nonrecurrence, for example, if the + // induction variable is multiplied by zero after constant folding. Go + // ahead and return the folded value. + if (!AddRec) + return FoldedRec; + break; + } + + // If the scope is outside the addrec's loop, evaluate it by using the + // loop exit value of the addrec. + if (!AddRec->getLoop()->contains(L)) { + // To evaluate this recurrence, we need to know how many times the AddRec + // loop iterates. Compute this now. + const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop()); + if (BackedgeTakenCount == getCouldNotCompute()) return AddRec; + + // Then, evaluate the AddRec. + return AddRec->evaluateAtIteration(BackedgeTakenCount, *this); + } + + return AddRec; + } + + if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) { + const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); + if (Op == Cast->getOperand()) + return Cast; // must be loop invariant + return getZeroExtendExpr(Op, Cast->getType()); + } + + if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) { + const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); + if (Op == Cast->getOperand()) + return Cast; // must be loop invariant + return getSignExtendExpr(Op, Cast->getType()); + } + + if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) { + const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); + if (Op == Cast->getOperand()) + return Cast; // must be loop invariant + return getTruncateExpr(Op, Cast->getType()); + } + + llvm_unreachable("Unknown SCEV type!"); + return 0; +} + +/// getSCEVAtScope - This is a convenience function which does +/// getSCEVAtScope(getSCEV(V), L). +const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) { + return getSCEVAtScope(getSCEV(V), L); +} + +/// SolveLinEquationWithOverflow - Finds the minimum unsigned root of the +/// following equation: +/// +/// A * X = B (mod N) +/// +/// where N = 2^BW and BW is the common bit width of A and B. The signedness of +/// A and B isn't important. +/// +/// If the equation does not have a solution, SCEVCouldNotCompute is returned. +static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B, + ScalarEvolution &SE) { + uint32_t BW = A.getBitWidth(); + assert(BW == B.getBitWidth() && "Bit widths must be the same."); + assert(A != 0 && "A must be non-zero."); + + // 1. D = gcd(A, N) + // + // The gcd of A and N may have only one prime factor: 2. The number of + // trailing zeros in A is its multiplicity + uint32_t Mult2 = A.countTrailingZeros(); + // D = 2^Mult2 + + // 2. Check if B is divisible by D. + // + // B is divisible by D if and only if the multiplicity of prime factor 2 for B + // is not less than multiplicity of this prime factor for D. + if (B.countTrailingZeros() < Mult2) + return SE.getCouldNotCompute(); + + // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic + // modulo (N / D). + // + // (N / D) may need BW+1 bits in its representation. Hence, we'll use this + // bit width during computations. + APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D + APInt Mod(BW + 1, 0); + Mod.setBit(BW - Mult2); // Mod = N / D + APInt I = AD.multiplicativeInverse(Mod); + + // 4. Compute the minimum unsigned root of the equation: + // I * (B / D) mod (N / D) + APInt Result = (I * B.lshr(Mult2).zext(BW + 1)).urem(Mod); + + // The result is guaranteed to be less than 2^BW so we may truncate it to BW + // bits. + return SE.getConstant(Result.trunc(BW)); +} + +/// SolveQuadraticEquation - Find the roots of the quadratic equation for the +/// given quadratic chrec {L,+,M,+,N}. This returns either the two roots (which +/// might be the same) or two SCEVCouldNotCompute objects. +/// +static std::pair<const SCEV *,const SCEV *> +SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { + assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!"); + const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0)); + const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1)); + const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2)); + + // We currently can only solve this if the coefficients are constants. + if (!LC || !MC || !NC) { + const SCEV *CNC = SE.getCouldNotCompute(); + return std::make_pair(CNC, CNC); + } + + uint32_t BitWidth = LC->getValue()->getValue().getBitWidth(); + const APInt &L = LC->getValue()->getValue(); + const APInt &M = MC->getValue()->getValue(); + const APInt &N = NC->getValue()->getValue(); + APInt Two(BitWidth, 2); + APInt Four(BitWidth, 4); + + { + using namespace APIntOps; + const APInt& C = L; + // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C + // The B coefficient is M-N/2 + APInt B(M); + B -= sdiv(N,Two); + + // The A coefficient is N/2 + APInt A(N.sdiv(Two)); + + // Compute the B^2-4ac term. + APInt SqrtTerm(B); + SqrtTerm *= B; + SqrtTerm -= Four * (A * C); + + // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest + // integer value or else APInt::sqrt() will assert. + APInt SqrtVal(SqrtTerm.sqrt()); + + // Compute the two solutions for the quadratic formula. + // The divisions must be performed as signed divisions. + APInt NegB(-B); + APInt TwoA( A << 1 ); + if (TwoA.isMinValue()) { + const SCEV *CNC = SE.getCouldNotCompute(); + return std::make_pair(CNC, CNC); + } + + LLVMContext &Context = SE.getContext(); + + ConstantInt *Solution1 = + ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA)); + ConstantInt *Solution2 = + ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA)); + + return std::make_pair(SE.getConstant(Solution1), + SE.getConstant(Solution2)); + } // end APIntOps namespace +} + +/// HowFarToZero - Return the number of times a backedge comparing the specified +/// value to zero will execute. If not computable, return CouldNotCompute. +/// +/// This is only used for loops with a "x != y" exit test. The exit condition is +/// now expressed as a single expression, V = x-y. So the exit test is +/// effectively V != 0. We know and take advantage of the fact that this +/// expression only being used in a comparison by zero context. +ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { + // If the value is a constant + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) { + // If the value is already zero, the branch will execute zero times. + if (C->getValue()->isZero()) return C; + return getCouldNotCompute(); // Otherwise it will loop infinitely. + } + + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V); + if (!AddRec || AddRec->getLoop() != L) + return getCouldNotCompute(); + + // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of + // the quadratic equation to solve it. + if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) { + std::pair<const SCEV *,const SCEV *> Roots = + SolveQuadraticEquation(AddRec, *this); + const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first); + const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second); + if (R1 && R2) { +#if 0 + dbgs() << "HFTZ: " << *V << " - sol#1: " << *R1 + << " sol#2: " << *R2 << "\n"; +#endif + // Pick the smallest positive root value. + if (ConstantInt *CB = + dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT, + R1->getValue(), + R2->getValue()))) { + if (CB->getZExtValue() == false) + std::swap(R1, R2); // R1 is the minimum root now. + + // We can only use this value if the chrec ends up with an exact zero + // value at this index. When solving for "X*X != 5", for example, we + // should not accept a root of 2. + const SCEV *Val = AddRec->evaluateAtIteration(R1, *this); + if (Val->isZero()) + return R1; // We found a quadratic root! + } + } + return getCouldNotCompute(); + } + + // Otherwise we can only handle this if it is affine. + if (!AddRec->isAffine()) + return getCouldNotCompute(); + + // If this is an affine expression, the execution count of this branch is + // the minimum unsigned root of the following equation: + // + // Start + Step*N = 0 (mod 2^BW) + // + // equivalent to: + // + // Step*N = -Start (mod 2^BW) + // + // where BW is the common bit width of Start and Step. + + // Get the initial value for the loop. + const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop()); + const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop()); + + // For now we handle only constant steps. + // + // TODO: Handle a nonconstant Step given AddRec<NUW>. If the + // AddRec is NUW, then (in an unsigned sense) it cannot be counting up to wrap + // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step. + // We have not yet seen any such cases. + const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step); + if (StepC == 0) + return getCouldNotCompute(); + + // For positive steps (counting up until unsigned overflow): + // N = -Start/Step (as unsigned) + // For negative steps (counting down to zero): + // N = Start/-Step + // First compute the unsigned distance from zero in the direction of Step. + bool CountDown = StepC->getValue()->getValue().isNegative(); + const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start); + + // Handle unitary steps, which cannot wraparound. + // 1*N = -Start; -1*N = Start (mod 2^BW), so: + // N = Distance (as unsigned) + if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) + return Distance; + + // If the recurrence is known not to wraparound, unsigned divide computes the + // back edge count. We know that the value will either become zero (and thus + // the loop terminates), that the loop will terminate through some other exit + // condition first, or that the loop has undefined behavior. This means + // we can't "miss" the exit value, even with nonunit stride. + // + // FIXME: Prove that loops always exhibits *acceptable* undefined + // behavior. Loops must exhibit defined behavior until a wrapped value is + // actually used. So the trip count computed by udiv could be smaller than the + // number of well-defined iterations. + if (AddRec->getNoWrapFlags(SCEV::FlagNW)) + // FIXME: We really want an "isexact" bit for udiv. + return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); + + // Then, try to solve the above equation provided that Start is constant. + if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) + return SolveLinEquationWithOverflow(StepC->getValue()->getValue(), + -StartC->getValue()->getValue(), + *this); + return getCouldNotCompute(); +} + +/// HowFarToNonZero - Return the number of times a backedge checking the +/// specified value for nonzero will execute. If not computable, return +/// CouldNotCompute +ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) { + // Loops that look like: while (X == 0) are very strange indeed. We don't + // handle them yet except for the trivial case. This could be expanded in the + // future as needed. + + // If the value is a constant, check to see if it is known to be non-zero + // already. If so, the backedge will execute zero times. + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) { + if (!C->getValue()->isNullValue()) + return getConstant(C->getType(), 0); + return getCouldNotCompute(); // Otherwise it will loop infinitely. + } + + // We could implement others, but I really doubt anyone writes loops like + // this, and if they did, they would already be constant folded. + return getCouldNotCompute(); +} + +/// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB +/// (which may not be an immediate predecessor) which has exactly one +/// successor from which BB is reachable, or null if no such block is +/// found. +/// +std::pair<BasicBlock *, BasicBlock *> +ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) { + // If the block has a unique predecessor, then there is no path from the + // predecessor to the block that does not go through the direct edge + // from the predecessor to the block. + if (BasicBlock *Pred = BB->getSinglePredecessor()) + return std::make_pair(Pred, BB); + + // A loop's header is defined to be a block that dominates the loop. + // If the header has a unique predecessor outside the loop, it must be + // a block that has exactly one successor that can reach the loop. + if (Loop *L = LI->getLoopFor(BB)) + return std::make_pair(L->getLoopPredecessor(), L->getHeader()); + + return std::pair<BasicBlock *, BasicBlock *>(); +} + +/// HasSameValue - SCEV structural equivalence is usually sufficient for +/// testing whether two expressions are equal, however for the purposes of +/// looking for a condition guarding a loop, it can be useful to be a little +/// more general, since a front-end may have replicated the controlling +/// expression. +/// +static bool HasSameValue(const SCEV *A, const SCEV *B) { + // Quick check to see if they are the same SCEV. + if (A == B) return true; + + // Otherwise, if they're both SCEVUnknown, it's possible that they hold + // two different instructions with the same value. Check for this case. + if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A)) + if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B)) + if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue())) + if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue())) + if (AI->isIdenticalTo(BI) && !AI->mayReadFromMemory()) + return true; + + // Otherwise assume they may have a different value. + return false; +} + +/// SimplifyICmpOperands - Simplify LHS and RHS in a comparison with +/// predicate Pred. Return true iff any changes were made. +/// +bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, + const SCEV *&LHS, const SCEV *&RHS) { + bool Changed = false; + + // Canonicalize a constant to the right side. + if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) { + // Check for both operands constant. + if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) { + if (ConstantExpr::getICmp(Pred, + LHSC->getValue(), + RHSC->getValue())->isNullValue()) + goto trivially_false; + else + goto trivially_true; + } + // Otherwise swap the operands to put the constant on the right. + std::swap(LHS, RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + Changed = true; + } + + // If we're comparing an addrec with a value which is loop-invariant in the + // addrec's loop, put the addrec on the left. Also make a dominance check, + // as both operands could be addrecs loop-invariant in each other's loop. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) { + const Loop *L = AR->getLoop(); + if (isLoopInvariant(LHS, L) && properlyDominates(LHS, L->getHeader())) { + std::swap(LHS, RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + Changed = true; + } + } + + // If there's a constant operand, canonicalize comparisons with boundary + // cases, and canonicalize *-or-equal comparisons to regular comparisons. + if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) { + const APInt &RA = RC->getValue()->getValue(); + switch (Pred) { + default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_NE: + break; + case ICmpInst::ICMP_UGE: + if ((RA - 1).isMinValue()) { + Pred = ICmpInst::ICMP_NE; + RHS = getConstant(RA - 1); + Changed = true; + break; + } + if (RA.isMaxValue()) { + Pred = ICmpInst::ICMP_EQ; + Changed = true; + break; + } + if (RA.isMinValue()) goto trivially_true; + + Pred = ICmpInst::ICMP_UGT; + RHS = getConstant(RA - 1); + Changed = true; + break; + case ICmpInst::ICMP_ULE: + if ((RA + 1).isMaxValue()) { + Pred = ICmpInst::ICMP_NE; + RHS = getConstant(RA + 1); + Changed = true; + break; + } + if (RA.isMinValue()) { + Pred = ICmpInst::ICMP_EQ; + Changed = true; + break; + } + if (RA.isMaxValue()) goto trivially_true; + + Pred = ICmpInst::ICMP_ULT; + RHS = getConstant(RA + 1); + Changed = true; + break; + case ICmpInst::ICMP_SGE: + if ((RA - 1).isMinSignedValue()) { + Pred = ICmpInst::ICMP_NE; + RHS = getConstant(RA - 1); + Changed = true; + break; + } + if (RA.isMaxSignedValue()) { + Pred = ICmpInst::ICMP_EQ; + Changed = true; + break; + } + if (RA.isMinSignedValue()) goto trivially_true; + + Pred = ICmpInst::ICMP_SGT; + RHS = getConstant(RA - 1); + Changed = true; + break; + case ICmpInst::ICMP_SLE: + if ((RA + 1).isMaxSignedValue()) { + Pred = ICmpInst::ICMP_NE; + RHS = getConstant(RA + 1); + Changed = true; + break; + } + if (RA.isMinSignedValue()) { + Pred = ICmpInst::ICMP_EQ; + Changed = true; + break; + } + if (RA.isMaxSignedValue()) goto trivially_true; + + Pred = ICmpInst::ICMP_SLT; + RHS = getConstant(RA + 1); + Changed = true; + break; + case ICmpInst::ICMP_UGT: + if (RA.isMinValue()) { + Pred = ICmpInst::ICMP_NE; + Changed = true; + break; + } + if ((RA + 1).isMaxValue()) { + Pred = ICmpInst::ICMP_EQ; + RHS = getConstant(RA + 1); + Changed = true; + break; + } + if (RA.isMaxValue()) goto trivially_false; + break; + case ICmpInst::ICMP_ULT: + if (RA.isMaxValue()) { + Pred = ICmpInst::ICMP_NE; + Changed = true; + break; + } + if ((RA - 1).isMinValue()) { + Pred = ICmpInst::ICMP_EQ; + RHS = getConstant(RA - 1); + Changed = true; + break; + } + if (RA.isMinValue()) goto trivially_false; + break; + case ICmpInst::ICMP_SGT: + if (RA.isMinSignedValue()) { + Pred = ICmpInst::ICMP_NE; + Changed = true; + break; + } + if ((RA + 1).isMaxSignedValue()) { + Pred = ICmpInst::ICMP_EQ; + RHS = getConstant(RA + 1); + Changed = true; + break; + } + if (RA.isMaxSignedValue()) goto trivially_false; + break; + case ICmpInst::ICMP_SLT: + if (RA.isMaxSignedValue()) { + Pred = ICmpInst::ICMP_NE; + Changed = true; + break; + } + if ((RA - 1).isMinSignedValue()) { + Pred = ICmpInst::ICMP_EQ; + RHS = getConstant(RA - 1); + Changed = true; + break; + } + if (RA.isMinSignedValue()) goto trivially_false; + break; + } + } + + // Check for obvious equality. + if (HasSameValue(LHS, RHS)) { + if (ICmpInst::isTrueWhenEqual(Pred)) + goto trivially_true; + if (ICmpInst::isFalseWhenEqual(Pred)) + goto trivially_false; + } + + // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by + // adding or subtracting 1 from one of the operands. + switch (Pred) { + case ICmpInst::ICMP_SLE: + if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) { + RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, + SCEV::FlagNSW); + Pred = ICmpInst::ICMP_SLT; + Changed = true; + } else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) { + LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS, + SCEV::FlagNSW); + Pred = ICmpInst::ICMP_SLT; + Changed = true; + } + break; + case ICmpInst::ICMP_SGE: + if (!getSignedRange(RHS).getSignedMin().isMinSignedValue()) { + RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS, + SCEV::FlagNSW); + Pred = ICmpInst::ICMP_SGT; + Changed = true; + } else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) { + LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, + SCEV::FlagNSW); + Pred = ICmpInst::ICMP_SGT; + Changed = true; + } + break; + case ICmpInst::ICMP_ULE: + if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) { + RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, + SCEV::FlagNUW); + Pred = ICmpInst::ICMP_ULT; + Changed = true; + } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) { + LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS, + SCEV::FlagNUW); + Pred = ICmpInst::ICMP_ULT; + Changed = true; + } + break; + case ICmpInst::ICMP_UGE: + if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) { + RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS, + SCEV::FlagNUW); + Pred = ICmpInst::ICMP_UGT; + Changed = true; + } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) { + LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, + SCEV::FlagNUW); + Pred = ICmpInst::ICMP_UGT; + Changed = true; + } + break; + default: + break; + } + + // TODO: More simplifications are possible here. + + return Changed; + +trivially_true: + // Return 0 == 0. + LHS = RHS = getConstant(ConstantInt::getFalse(getContext())); + Pred = ICmpInst::ICMP_EQ; + return true; + +trivially_false: + // Return 0 != 0. + LHS = RHS = getConstant(ConstantInt::getFalse(getContext())); + Pred = ICmpInst::ICMP_NE; + return true; +} + +bool ScalarEvolution::isKnownNegative(const SCEV *S) { + return getSignedRange(S).getSignedMax().isNegative(); +} + +bool ScalarEvolution::isKnownPositive(const SCEV *S) { + return getSignedRange(S).getSignedMin().isStrictlyPositive(); +} + +bool ScalarEvolution::isKnownNonNegative(const SCEV *S) { + return !getSignedRange(S).getSignedMin().isNegative(); +} + +bool ScalarEvolution::isKnownNonPositive(const SCEV *S) { + return !getSignedRange(S).getSignedMax().isStrictlyPositive(); +} + +bool ScalarEvolution::isKnownNonZero(const SCEV *S) { + return isKnownNegative(S) || isKnownPositive(S); +} + +bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + // Canonicalize the inputs first. + (void)SimplifyICmpOperands(Pred, LHS, RHS); + + // If LHS or RHS is an addrec, check to see if the condition is true in + // every iteration of the loop. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) + if (isLoopEntryGuardedByCond( + AR->getLoop(), Pred, AR->getStart(), RHS) && + isLoopBackedgeGuardedByCond( + AR->getLoop(), Pred, AR->getPostIncExpr(*this), RHS)) + return true; + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) + if (isLoopEntryGuardedByCond( + AR->getLoop(), Pred, LHS, AR->getStart()) && + isLoopBackedgeGuardedByCond( + AR->getLoop(), Pred, LHS, AR->getPostIncExpr(*this))) + return true; + + // Otherwise see what can be done with known constant ranges. + return isKnownPredicateWithRanges(Pred, LHS, RHS); +} + +bool +ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + if (HasSameValue(LHS, RHS)) + return ICmpInst::isTrueWhenEqual(Pred); + + // This code is split out from isKnownPredicate because it is called from + // within isLoopEntryGuardedByCond. + switch (Pred) { + default: + llvm_unreachable("Unexpected ICmpInst::Predicate value!"); + break; + case ICmpInst::ICMP_SGT: + Pred = ICmpInst::ICMP_SLT; + std::swap(LHS, RHS); + case ICmpInst::ICMP_SLT: { + ConstantRange LHSRange = getSignedRange(LHS); + ConstantRange RHSRange = getSignedRange(RHS); + if (LHSRange.getSignedMax().slt(RHSRange.getSignedMin())) + return true; + if (LHSRange.getSignedMin().sge(RHSRange.getSignedMax())) + return false; + break; + } + case ICmpInst::ICMP_SGE: + Pred = ICmpInst::ICMP_SLE; + std::swap(LHS, RHS); + case ICmpInst::ICMP_SLE: { + ConstantRange LHSRange = getSignedRange(LHS); + ConstantRange RHSRange = getSignedRange(RHS); + if (LHSRange.getSignedMax().sle(RHSRange.getSignedMin())) + return true; + if (LHSRange.getSignedMin().sgt(RHSRange.getSignedMax())) + return false; + break; + } + case ICmpInst::ICMP_UGT: + Pred = ICmpInst::ICMP_ULT; + std::swap(LHS, RHS); + case ICmpInst::ICMP_ULT: { + ConstantRange LHSRange = getUnsignedRange(LHS); + ConstantRange RHSRange = getUnsignedRange(RHS); + if (LHSRange.getUnsignedMax().ult(RHSRange.getUnsignedMin())) + return true; + if (LHSRange.getUnsignedMin().uge(RHSRange.getUnsignedMax())) + return false; + break; + } + case ICmpInst::ICMP_UGE: + Pred = ICmpInst::ICMP_ULE; + std::swap(LHS, RHS); + case ICmpInst::ICMP_ULE: { + ConstantRange LHSRange = getUnsignedRange(LHS); + ConstantRange RHSRange = getUnsignedRange(RHS); + if (LHSRange.getUnsignedMax().ule(RHSRange.getUnsignedMin())) + return true; + if (LHSRange.getUnsignedMin().ugt(RHSRange.getUnsignedMax())) + return false; + break; + } + case ICmpInst::ICMP_NE: { + if (getUnsignedRange(LHS).intersectWith(getUnsignedRange(RHS)).isEmptySet()) + return true; + if (getSignedRange(LHS).intersectWith(getSignedRange(RHS)).isEmptySet()) + return true; + + const SCEV *Diff = getMinusSCEV(LHS, RHS); + if (isKnownNonZero(Diff)) + return true; + break; + } + case ICmpInst::ICMP_EQ: + // The check at the top of the function catches the case where + // the values are known to be equal. + break; + } + return false; +} + +/// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is +/// protected by a conditional between LHS and RHS. This is used to +/// to eliminate casts. +bool +ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, + ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + // Interpret a null as meaning no loop, where there is obviously no guard + // (interprocedural conditions notwithstanding). + if (!L) return true; + + BasicBlock *Latch = L->getLoopLatch(); + if (!Latch) + return false; + + BranchInst *LoopContinuePredicate = + dyn_cast<BranchInst>(Latch->getTerminator()); + if (!LoopContinuePredicate || + LoopContinuePredicate->isUnconditional()) + return false; + + return isImpliedCond(Pred, LHS, RHS, + LoopContinuePredicate->getCondition(), + LoopContinuePredicate->getSuccessor(0) != L->getHeader()); +} + +/// isLoopEntryGuardedByCond - Test whether entry to the loop is protected +/// by a conditional between LHS and RHS. This is used to help avoid max +/// expressions in loop trip counts, and to eliminate casts. +bool +ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, + ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + // Interpret a null as meaning no loop, where there is obviously no guard + // (interprocedural conditions notwithstanding). + if (!L) return false; + + // Starting at the loop predecessor, climb up the predecessor chain, as long + // as there are predecessors that can be found that have unique successors + // leading to the original header. + for (std::pair<BasicBlock *, BasicBlock *> + Pair(L->getLoopPredecessor(), L->getHeader()); + Pair.first; + Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { + + BranchInst *LoopEntryPredicate = + dyn_cast<BranchInst>(Pair.first->getTerminator()); + if (!LoopEntryPredicate || + LoopEntryPredicate->isUnconditional()) + continue; + + if (isImpliedCond(Pred, LHS, RHS, + LoopEntryPredicate->getCondition(), + LoopEntryPredicate->getSuccessor(0) != Pair.second)) + return true; + } + + return false; +} + +/// isImpliedCond - Test whether the condition described by Pred, LHS, +/// and RHS is true whenever the given Cond value evaluates to true. +bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + Value *FoundCondValue, + bool Inverse) { + // Recursively handle And and Or conditions. + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) { + if (BO->getOpcode() == Instruction::And) { + if (!Inverse) + return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) || + isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse); + } else if (BO->getOpcode() == Instruction::Or) { + if (Inverse) + return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) || + isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse); + } + } + + ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue); + if (!ICI) return false; + + // Bail if the ICmp's operands' types are wider than the needed type + // before attempting to call getSCEV on them. This avoids infinite + // recursion, since the analysis of widening casts can require loop + // exit condition information for overflow checking, which would + // lead back here. + if (getTypeSizeInBits(LHS->getType()) < + getTypeSizeInBits(ICI->getOperand(0)->getType())) + return false; + + // Now that we found a conditional branch that dominates the loop, check to + // see if it is the comparison we are looking for. + ICmpInst::Predicate FoundPred; + if (Inverse) + FoundPred = ICI->getInversePredicate(); + else + FoundPred = ICI->getPredicate(); + + const SCEV *FoundLHS = getSCEV(ICI->getOperand(0)); + const SCEV *FoundRHS = getSCEV(ICI->getOperand(1)); + + // Balance the types. The case where FoundLHS' type is wider than + // LHS' type is checked for above. + if (getTypeSizeInBits(LHS->getType()) > + getTypeSizeInBits(FoundLHS->getType())) { + if (CmpInst::isSigned(Pred)) { + FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType()); + FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType()); + } else { + FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType()); + FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType()); + } + } + + // Canonicalize the query to match the way instcombine will have + // canonicalized the comparison. + if (SimplifyICmpOperands(Pred, LHS, RHS)) + if (LHS == RHS) + return CmpInst::isTrueWhenEqual(Pred); + if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS)) + if (FoundLHS == FoundRHS) + return CmpInst::isFalseWhenEqual(Pred); + + // Check to see if we can make the LHS or RHS match. + if (LHS == FoundRHS || RHS == FoundLHS) { + if (isa<SCEVConstant>(RHS)) { + std::swap(FoundLHS, FoundRHS); + FoundPred = ICmpInst::getSwappedPredicate(FoundPred); + } else { + std::swap(LHS, RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + } + } + + // Check whether the found predicate is the same as the desired predicate. + if (FoundPred == Pred) + return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS); + + // Check whether swapping the found predicate makes it the same as the + // desired predicate. + if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) { + if (isa<SCEVConstant>(RHS)) + return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS); + else + return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred), + RHS, LHS, FoundLHS, FoundRHS); + } + + // Check whether the actual condition is beyond sufficient. + if (FoundPred == ICmpInst::ICMP_EQ) + if (ICmpInst::isTrueWhenEqual(Pred)) + if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS)) + return true; + if (Pred == ICmpInst::ICMP_NE) + if (!ICmpInst::isTrueWhenEqual(FoundPred)) + if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS)) + return true; + + // Otherwise assume the worst. + return false; +} + +/// isImpliedCondOperands - Test whether the condition described by Pred, +/// LHS, and RHS is true whenever the condition described by Pred, FoundLHS, +/// and FoundRHS is true. +bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, + const SCEV *FoundRHS) { + return isImpliedCondOperandsHelper(Pred, LHS, RHS, + FoundLHS, FoundRHS) || + // ~x < ~y --> x > y + isImpliedCondOperandsHelper(Pred, LHS, RHS, + getNotSCEV(FoundRHS), + getNotSCEV(FoundLHS)); +} + +/// isImpliedCondOperandsHelper - Test whether the condition described by +/// Pred, LHS, and RHS is true whenever the condition described by Pred, +/// FoundLHS, and FoundRHS is true. +bool +ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, + const SCEV *FoundRHS) { + switch (Pred) { + default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_NE: + if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS)) + return true; + break; + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + if (isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, LHS, FoundLHS) && + isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, RHS, FoundRHS)) + return true; + break; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + if (isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, LHS, FoundLHS) && + isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, RHS, FoundRHS)) + return true; + break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + if (isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, LHS, FoundLHS) && + isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, RHS, FoundRHS)) + return true; + break; + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + if (isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, LHS, FoundLHS) && + isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, RHS, FoundRHS)) + return true; + break; + } + + return false; +} + +/// getBECount - Subtract the end and start values and divide by the step, +/// rounding up, to get the number of times the backedge is executed. Return +/// CouldNotCompute if an intermediate computation overflows. +const SCEV *ScalarEvolution::getBECount(const SCEV *Start, + const SCEV *End, + const SCEV *Step, + bool NoWrap) { + assert(!isKnownNegative(Step) && + "This code doesn't handle negative strides yet!"); + + const Type *Ty = Start->getType(); + + // When Start == End, we have an exact BECount == 0. Short-circuit this case + // here because SCEV may not be able to determine that the unsigned division + // after rounding is zero. + if (Start == End) + return getConstant(Ty, 0); + + const SCEV *NegOne = getConstant(Ty, (uint64_t)-1); + const SCEV *Diff = getMinusSCEV(End, Start); + const SCEV *RoundUp = getAddExpr(Step, NegOne); + + // Add an adjustment to the difference between End and Start so that + // the division will effectively round up. + const SCEV *Add = getAddExpr(Diff, RoundUp); + + if (!NoWrap) { + // Check Add for unsigned overflow. + // TODO: More sophisticated things could be done here. + const Type *WideTy = IntegerType::get(getContext(), + getTypeSizeInBits(Ty) + 1); + const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy); + const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy); + const SCEV *OperandExtendedAdd = getAddExpr(EDiff, ERoundUp); + if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd) + return getCouldNotCompute(); + } + + return getUDivExpr(Add, Step); +} + +/// HowManyLessThans - Return the number of times a backedge containing the +/// specified less-than comparison will execute. If not computable, return +/// CouldNotCompute. +ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, + const Loop *L, bool isSigned) { + // Only handle: "ADDREC < LoopInvariant". + if (!isLoopInvariant(RHS, L)) return getCouldNotCompute(); + + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS); + if (!AddRec || AddRec->getLoop() != L) + return getCouldNotCompute(); + + // Check to see if we have a flag which makes analysis easy. + bool NoWrap = isSigned ? AddRec->getNoWrapFlags(SCEV::FlagNSW) : + AddRec->getNoWrapFlags(SCEV::FlagNUW); + + if (AddRec->isAffine()) { + unsigned BitWidth = getTypeSizeInBits(AddRec->getType()); + const SCEV *Step = AddRec->getStepRecurrence(*this); + + if (Step->isZero()) + return getCouldNotCompute(); + if (Step->isOne()) { + // With unit stride, the iteration never steps past the limit value. + } else if (isKnownPositive(Step)) { + // Test whether a positive iteration can step past the limit + // value and past the maximum value for its type in a single step. + // Note that it's not sufficient to check NoWrap here, because even + // though the value after a wrap is undefined, it's not undefined + // behavior, so if wrap does occur, the loop could either terminate or + // loop infinitely, but in either case, the loop is guaranteed to + // iterate at least until the iteration where the wrapping occurs. + const SCEV *One = getConstant(Step->getType(), 1); + if (isSigned) { + APInt Max = APInt::getSignedMaxValue(BitWidth); + if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax()) + .slt(getSignedRange(RHS).getSignedMax())) + return getCouldNotCompute(); + } else { + APInt Max = APInt::getMaxValue(BitWidth); + if ((Max - getUnsignedRange(getMinusSCEV(Step, One)).getUnsignedMax()) + .ult(getUnsignedRange(RHS).getUnsignedMax())) + return getCouldNotCompute(); + } + } else + // TODO: Handle negative strides here and below. + return getCouldNotCompute(); + + // We know the LHS is of the form {n,+,s} and the RHS is some loop-invariant + // m. So, we count the number of iterations in which {n,+,s} < m is true. + // Note that we cannot simply return max(m-n,0)/s because it's not safe to + // treat m-n as signed nor unsigned due to overflow possibility. + + // First, we get the value of the LHS in the first iteration: n + const SCEV *Start = AddRec->getOperand(0); + + // Determine the minimum constant start value. + const SCEV *MinStart = getConstant(isSigned ? + getSignedRange(Start).getSignedMin() : + getUnsignedRange(Start).getUnsignedMin()); + + // If we know that the condition is true in order to enter the loop, + // then we know that it will run exactly (m-n)/s times. Otherwise, we + // only know that it will execute (max(m,n)-n)/s times. In both cases, + // the division must round up. + const SCEV *End = RHS; + if (!isLoopEntryGuardedByCond(L, + isSigned ? ICmpInst::ICMP_SLT : + ICmpInst::ICMP_ULT, + getMinusSCEV(Start, Step), RHS)) + End = isSigned ? getSMaxExpr(RHS, Start) + : getUMaxExpr(RHS, Start); + + // Determine the maximum constant end value. + const SCEV *MaxEnd = getConstant(isSigned ? + getSignedRange(End).getSignedMax() : + getUnsignedRange(End).getUnsignedMax()); + + // If MaxEnd is within a step of the maximum integer value in its type, + // adjust it down to the minimum value which would produce the same effect. + // This allows the subsequent ceiling division of (N+(step-1))/step to + // compute the correct value. + const SCEV *StepMinusOne = getMinusSCEV(Step, + getConstant(Step->getType(), 1)); + MaxEnd = isSigned ? + getSMinExpr(MaxEnd, + getMinusSCEV(getConstant(APInt::getSignedMaxValue(BitWidth)), + StepMinusOne)) : + getUMinExpr(MaxEnd, + getMinusSCEV(getConstant(APInt::getMaxValue(BitWidth)), + StepMinusOne)); + + // Finally, we subtract these two values and divide, rounding up, to get + // the number of times the backedge is executed. + const SCEV *BECount = getBECount(Start, End, Step, NoWrap); + + // The maximum backedge count is similar, except using the minimum start + // value and the maximum end value. + // If we already have an exact constant BECount, use it instead. + const SCEV *MaxBECount = isa<SCEVConstant>(BECount) ? BECount + : getBECount(MinStart, MaxEnd, Step, NoWrap); + + // If the stride is nonconstant, and NoWrap == true, then + // getBECount(MinStart, MaxEnd) may not compute. This would result in an + // exact BECount and invalid MaxBECount, which should be avoided to catch + // more optimization opportunities. + if (isa<SCEVCouldNotCompute>(MaxBECount)) + MaxBECount = BECount; + + return BackedgeTakenInfo(BECount, MaxBECount); + } + + return getCouldNotCompute(); +} + +/// getNumIterationsInRange - Return the number of iterations of this loop that +/// produce values in the specified constant range. Another way of looking at +/// this is that it returns the first iteration number where the value is not in +/// the condition, thus computing the exit count. If the iteration count can't +/// be computed, an instance of SCEVCouldNotCompute is returned. +const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, + ScalarEvolution &SE) const { + if (Range.isFullSet()) // Infinite loop. + return SE.getCouldNotCompute(); + + // If the start is a non-zero constant, shift the range to simplify things. + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart())) + if (!SC->getValue()->isZero()) { + SmallVector<const SCEV *, 4> Operands(op_begin(), op_end()); + Operands[0] = SE.getConstant(SC->getType(), 0); + const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(), + getNoWrapFlags(FlagNW)); + if (const SCEVAddRecExpr *ShiftedAddRec = + dyn_cast<SCEVAddRecExpr>(Shifted)) + return ShiftedAddRec->getNumIterationsInRange( + Range.subtract(SC->getValue()->getValue()), SE); + // This is strange and shouldn't happen. + return SE.getCouldNotCompute(); + } + + // The only time we can solve this is when we have all constant indices. + // Otherwise, we cannot determine the overflow conditions. + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) + if (!isa<SCEVConstant>(getOperand(i))) + return SE.getCouldNotCompute(); + + + // Okay at this point we know that all elements of the chrec are constants and + // that the start element is zero. + + // First check to see if the range contains zero. If not, the first + // iteration exits. + unsigned BitWidth = SE.getTypeSizeInBits(getType()); + if (!Range.contains(APInt(BitWidth, 0))) + return SE.getConstant(getType(), 0); + + if (isAffine()) { + // If this is an affine expression then we have this situation: + // Solve {0,+,A} in Range === Ax in Range + + // We know that zero is in the range. If A is positive then we know that + // the upper value of the range must be the first possible exit value. + // If A is negative then the lower of the range is the last possible loop + // value. Also note that we already checked for a full range. + APInt One(BitWidth,1); + APInt A = cast<SCEVConstant>(getOperand(1))->getValue()->getValue(); + APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower(); + + // The exit value should be (End+A)/A. + APInt ExitVal = (End + A).udiv(A); + ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal); + + // Evaluate at the exit value. If we really did fall out of the valid + // range, then we computed our trip count, otherwise wrap around or other + // things must have happened. + ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE); + if (Range.contains(Val->getValue())) + return SE.getCouldNotCompute(); // Something strange happened + + // Ensure that the previous value is in the range. This is a sanity check. + assert(Range.contains( + EvaluateConstantChrecAtConstant(this, + ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) && + "Linear scev computation is off in a bad way!"); + return SE.getConstant(ExitValue); + } else if (isQuadratic()) { + // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the + // quadratic equation to solve it. To do this, we must frame our problem in + // terms of figuring out when zero is crossed, instead of when + // Range.getUpper() is crossed. + SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end()); + NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper())); + const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(), + // getNoWrapFlags(FlagNW) + FlagAnyWrap); + + // Next, solve the constructed addrec + std::pair<const SCEV *,const SCEV *> Roots = + SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE); + const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first); + const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second); + if (R1) { + // Pick the smallest positive root value. + if (ConstantInt *CB = + dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT, + R1->getValue(), R2->getValue()))) { + if (CB->getZExtValue() == false) + std::swap(R1, R2); // R1 is the minimum root now. + + // Make sure the root is not off by one. The returned iteration should + // not be in the range, but the previous one should be. When solving + // for "X*X < 5", for example, we should not return a root of 2. + ConstantInt *R1Val = EvaluateConstantChrecAtConstant(this, + R1->getValue(), + SE); + if (Range.contains(R1Val->getValue())) { + // The next iteration must be out of the range... + ConstantInt *NextVal = + ConstantInt::get(SE.getContext(), R1->getValue()->getValue()+1); + + R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); + if (!Range.contains(R1Val->getValue())) + return SE.getConstant(NextVal); + return SE.getCouldNotCompute(); // Something strange happened + } + + // If R1 was not in the range, then it is a good return value. Make + // sure that R1-1 WAS in the range though, just in case. + ConstantInt *NextVal = + ConstantInt::get(SE.getContext(), R1->getValue()->getValue()-1); + R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); + if (Range.contains(R1Val->getValue())) + return R1; + return SE.getCouldNotCompute(); // Something strange happened + } + } + } + + return SE.getCouldNotCompute(); +} + + + +//===----------------------------------------------------------------------===// +// SCEVCallbackVH Class Implementation +//===----------------------------------------------------------------------===// + +void ScalarEvolution::SCEVCallbackVH::deleted() { + assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!"); + if (PHINode *PN = dyn_cast<PHINode>(getValPtr())) + SE->ConstantEvolutionLoopExitValue.erase(PN); + SE->ValueExprMap.erase(getValPtr()); + // this now dangles! +} + +void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) { + assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!"); + + // Forget all the expressions associated with users of the old value, + // so that future queries will recompute the expressions using the new + // value. + Value *Old = getValPtr(); + SmallVector<User *, 16> Worklist; + SmallPtrSet<User *, 8> Visited; + for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end(); + UI != UE; ++UI) + Worklist.push_back(*UI); + while (!Worklist.empty()) { + User *U = Worklist.pop_back_val(); + // Deleting the Old value will cause this to dangle. Postpone + // that until everything else is done. + if (U == Old) + continue; + if (!Visited.insert(U)) + continue; + if (PHINode *PN = dyn_cast<PHINode>(U)) + SE->ConstantEvolutionLoopExitValue.erase(PN); + SE->ValueExprMap.erase(U); + for (Value::use_iterator UI = U->use_begin(), UE = U->use_end(); + UI != UE; ++UI) + Worklist.push_back(*UI); + } + // Delete the Old value. + if (PHINode *PN = dyn_cast<PHINode>(Old)) + SE->ConstantEvolutionLoopExitValue.erase(PN); + SE->ValueExprMap.erase(Old); + // this now dangles! +} + +ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) + : CallbackVH(V), SE(se) {} + +//===----------------------------------------------------------------------===// +// ScalarEvolution Class Implementation +//===----------------------------------------------------------------------===// + +ScalarEvolution::ScalarEvolution() + : FunctionPass(ID), FirstUnknown(0) { + initializeScalarEvolutionPass(*PassRegistry::getPassRegistry()); +} + +bool ScalarEvolution::runOnFunction(Function &F) { + this->F = &F; + LI = &getAnalysis<LoopInfo>(); + TD = getAnalysisIfAvailable<TargetData>(); + DT = &getAnalysis<DominatorTree>(); + return false; +} + +void ScalarEvolution::releaseMemory() { + // Iterate through all the SCEVUnknown instances and call their + // destructors, so that they release their references to their values. + for (SCEVUnknown *U = FirstUnknown; U; U = U->Next) + U->~SCEVUnknown(); + FirstUnknown = 0; + + ValueExprMap.clear(); + BackedgeTakenCounts.clear(); + ConstantEvolutionLoopExitValue.clear(); + ValuesAtScopes.clear(); + LoopDispositions.clear(); + BlockDispositions.clear(); + UnsignedRanges.clear(); + SignedRanges.clear(); + UniqueSCEVs.clear(); + SCEVAllocator.Reset(); +} + +void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<LoopInfo>(); + AU.addRequiredTransitive<DominatorTree>(); +} + +bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) { + return !isa<SCEVCouldNotCompute>(getBackedgeTakenCount(L)); +} + +static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, + const Loop *L) { + // Print all inner loops first + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + PrintLoopInfo(OS, SE, *I); + + OS << "Loop "; + WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false); + OS << ": "; + + SmallVector<BasicBlock *, 8> ExitBlocks; + L->getExitBlocks(ExitBlocks); + if (ExitBlocks.size() != 1) + OS << "<multiple exits> "; + + if (SE->hasLoopInvariantBackedgeTakenCount(L)) { + OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L); + } else { + OS << "Unpredictable backedge-taken count. "; + } + + OS << "\n" + "Loop "; + WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false); + OS << ": "; + + if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) { + OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L); + } else { + OS << "Unpredictable max backedge-taken count. "; + } + + OS << "\n"; +} + +void ScalarEvolution::print(raw_ostream &OS, const Module *) const { + // ScalarEvolution's implementation of the print method is to print + // out SCEV values of all instructions that are interesting. Doing + // this potentially causes it to create new SCEV objects though, + // which technically conflicts with the const qualifier. This isn't + // observable from outside the class though, so casting away the + // const isn't dangerous. + ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); + + OS << "Classifying expressions for: "; + WriteAsOperand(OS, F, /*PrintType=*/false); + OS << "\n"; + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) + if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) { + OS << *I << '\n'; + OS << " --> "; + const SCEV *SV = SE.getSCEV(&*I); + SV->print(OS); + + const Loop *L = LI->getLoopFor((*I).getParent()); + + const SCEV *AtUse = SE.getSCEVAtScope(SV, L); + if (AtUse != SV) { + OS << " --> "; + AtUse->print(OS); + } + + if (L) { + OS << "\t\t" "Exits: "; + const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop()); + if (!SE.isLoopInvariant(ExitValue, L)) { + OS << "<<Unknown>>"; + } else { + OS << *ExitValue; + } + } + + OS << "\n"; + } + + OS << "Determining loop execution counts for: "; + WriteAsOperand(OS, F, /*PrintType=*/false); + OS << "\n"; + for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + PrintLoopInfo(OS, &SE, *I); +} + +ScalarEvolution::LoopDisposition +ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) { + std::map<const Loop *, LoopDisposition> &Values = LoopDispositions[S]; + std::pair<std::map<const Loop *, LoopDisposition>::iterator, bool> Pair = + Values.insert(std::make_pair(L, LoopVariant)); + if (!Pair.second) + return Pair.first->second; + + LoopDisposition D = computeLoopDisposition(S, L); + return LoopDispositions[S][L] = D; +} + +ScalarEvolution::LoopDisposition +ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { + switch (S->getSCEVType()) { + case scConstant: + return LoopInvariant; + case scTruncate: + case scZeroExtend: + case scSignExtend: + return getLoopDisposition(cast<SCEVCastExpr>(S)->getOperand(), L); + case scAddRecExpr: { + const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S); + + // If L is the addrec's loop, it's computable. + if (AR->getLoop() == L) + return LoopComputable; + + // Add recurrences are never invariant in the function-body (null loop). + if (!L) + return LoopVariant; + + // This recurrence is variant w.r.t. L if L contains AR's loop. + if (L->contains(AR->getLoop())) + return LoopVariant; + + // This recurrence is invariant w.r.t. L if AR's loop contains L. + if (AR->getLoop()->contains(L)) + return LoopInvariant; + + // This recurrence is variant w.r.t. L if any of its operands + // are variant. + for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); + I != E; ++I) + if (!isLoopInvariant(*I, L)) + return LoopVariant; + + // Otherwise it's loop-invariant. + return LoopInvariant; + } + case scAddExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: { + const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S); + bool HasVarying = false; + for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); + I != E; ++I) { + LoopDisposition D = getLoopDisposition(*I, L); + if (D == LoopVariant) + return LoopVariant; + if (D == LoopComputable) + HasVarying = true; + } + return HasVarying ? LoopComputable : LoopInvariant; + } + case scUDivExpr: { + const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S); + LoopDisposition LD = getLoopDisposition(UDiv->getLHS(), L); + if (LD == LoopVariant) + return LoopVariant; + LoopDisposition RD = getLoopDisposition(UDiv->getRHS(), L); + if (RD == LoopVariant) + return LoopVariant; + return (LD == LoopInvariant && RD == LoopInvariant) ? + LoopInvariant : LoopComputable; + } + case scUnknown: + // All non-instruction values are loop invariant. All instructions are loop + // invariant if they are not contained in the specified loop. + // Instructions are never considered invariant in the function body + // (null loop) because they are defined within the "loop". + if (Instruction *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) + return (L && !L->contains(I)) ? LoopInvariant : LoopVariant; + return LoopInvariant; + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + return LoopVariant; + default: break; + } + llvm_unreachable("Unknown SCEV kind!"); + return LoopVariant; +} + +bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) { + return getLoopDisposition(S, L) == LoopInvariant; +} + +bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) { + return getLoopDisposition(S, L) == LoopComputable; +} + +ScalarEvolution::BlockDisposition +ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) { + std::map<const BasicBlock *, BlockDisposition> &Values = BlockDispositions[S]; + std::pair<std::map<const BasicBlock *, BlockDisposition>::iterator, bool> + Pair = Values.insert(std::make_pair(BB, DoesNotDominateBlock)); + if (!Pair.second) + return Pair.first->second; + + BlockDisposition D = computeBlockDisposition(S, BB); + return BlockDispositions[S][BB] = D; +} + +ScalarEvolution::BlockDisposition +ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { + switch (S->getSCEVType()) { + case scConstant: + return ProperlyDominatesBlock; + case scTruncate: + case scZeroExtend: + case scSignExtend: + return getBlockDisposition(cast<SCEVCastExpr>(S)->getOperand(), BB); + case scAddRecExpr: { + // This uses a "dominates" query instead of "properly dominates" query + // to test for proper dominance too, because the instruction which + // produces the addrec's value is a PHI, and a PHI effectively properly + // dominates its entire containing block. + const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S); + if (!DT->dominates(AR->getLoop()->getHeader(), BB)) + return DoesNotDominateBlock; + } + // FALL THROUGH into SCEVNAryExpr handling. + case scAddExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: { + const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S); + bool Proper = true; + for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); + I != E; ++I) { + BlockDisposition D = getBlockDisposition(*I, BB); + if (D == DoesNotDominateBlock) + return DoesNotDominateBlock; + if (D == DominatesBlock) + Proper = false; + } + return Proper ? ProperlyDominatesBlock : DominatesBlock; + } + case scUDivExpr: { + const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S); + const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS(); + BlockDisposition LD = getBlockDisposition(LHS, BB); + if (LD == DoesNotDominateBlock) + return DoesNotDominateBlock; + BlockDisposition RD = getBlockDisposition(RHS, BB); + if (RD == DoesNotDominateBlock) + return DoesNotDominateBlock; + return (LD == ProperlyDominatesBlock && RD == ProperlyDominatesBlock) ? + ProperlyDominatesBlock : DominatesBlock; + } + case scUnknown: + if (Instruction *I = + dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) { + if (I->getParent() == BB) + return DominatesBlock; + if (DT->properlyDominates(I->getParent(), BB)) + return ProperlyDominatesBlock; + return DoesNotDominateBlock; + } + return ProperlyDominatesBlock; + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + return DoesNotDominateBlock; + default: break; + } + llvm_unreachable("Unknown SCEV kind!"); + return DoesNotDominateBlock; +} + +bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) { + return getBlockDisposition(S, BB) >= DominatesBlock; +} + +bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) { + return getBlockDisposition(S, BB) == ProperlyDominatesBlock; +} + +bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { + switch (S->getSCEVType()) { + case scConstant: + return false; + case scTruncate: + case scZeroExtend: + case scSignExtend: { + const SCEVCastExpr *Cast = cast<SCEVCastExpr>(S); + const SCEV *CastOp = Cast->getOperand(); + return Op == CastOp || hasOperand(CastOp, Op); + } + case scAddRecExpr: + case scAddExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: { + const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S); + for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); + I != E; ++I) { + const SCEV *NAryOp = *I; + if (NAryOp == Op || hasOperand(NAryOp, Op)) + return true; + } + return false; + } + case scUDivExpr: { + const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S); + const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS(); + return LHS == Op || hasOperand(LHS, Op) || + RHS == Op || hasOperand(RHS, Op); + } + case scUnknown: + return false; + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + return false; + default: break; + } + llvm_unreachable("Unknown SCEV kind!"); + return false; +} + +void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { + ValuesAtScopes.erase(S); + LoopDispositions.erase(S); + BlockDispositions.erase(S); + UnsignedRanges.erase(S); + SignedRanges.erase(S); +} diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp new file mode 100644 index 0000000..e9edb3e --- /dev/null +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -0,0 +1,173 @@ +//===- ScalarEvolutionAliasAnalysis.cpp - SCEV-based Alias Analysis -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ScalarEvolutionAliasAnalysis pass, which implements a +// simple alias analysis implemented in terms of ScalarEvolution queries. +// +// This differs from traditional loop dependence analysis in that it tests +// for dependencies within a single iteration of a loop, rather than +// dependencies between different iterations. +// +// ScalarEvolution has a more complete understanding of pointer arithmetic +// than BasicAliasAnalysis' collection of ad-hoc analyses. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Pass.h" +using namespace llvm; + +namespace { + /// ScalarEvolutionAliasAnalysis - This is a simple alias analysis + /// implementation that uses ScalarEvolution to answer queries. + class ScalarEvolutionAliasAnalysis : public FunctionPass, + public AliasAnalysis { + ScalarEvolution *SE; + + public: + static char ID; // Class identification, replacement for typeinfo + ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(0) { + initializeScalarEvolutionAliasAnalysisPass( + *PassRegistry::getPassRegistry()); + } + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { + if (PI == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + + private: + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool runOnFunction(Function &F); + virtual AliasResult alias(const Location &LocA, const Location &LocB); + + Value *GetBaseValue(const SCEV *S); + }; +} // End of anonymous namespace + +// Register this pass... +char ScalarEvolutionAliasAnalysis::ID = 0; +INITIALIZE_AG_PASS_BEGIN(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa", + "ScalarEvolution-based Alias Analysis", false, true, false) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_AG_PASS_END(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa", + "ScalarEvolution-based Alias Analysis", false, true, false) + +FunctionPass *llvm::createScalarEvolutionAliasAnalysisPass() { + return new ScalarEvolutionAliasAnalysis(); +} + +void +ScalarEvolutionAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredTransitive<ScalarEvolution>(); + AU.setPreservesAll(); + AliasAnalysis::getAnalysisUsage(AU); +} + +bool +ScalarEvolutionAliasAnalysis::runOnFunction(Function &F) { + InitializeAliasAnalysis(this); + SE = &getAnalysis<ScalarEvolution>(); + return false; +} + +/// GetBaseValue - Given an expression, try to find a +/// base value. Return null is none was found. +Value * +ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) { + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + // In an addrec, assume that the base will be in the start, rather + // than the step. + return GetBaseValue(AR->getStart()); + } else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) { + // If there's a pointer operand, it'll be sorted at the end of the list. + const SCEV *Last = A->getOperand(A->getNumOperands()-1); + if (Last->getType()->isPointerTy()) + return GetBaseValue(Last); + } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + // This is a leaf node. + return U->getValue(); + } + // No Identified object found. + return 0; +} + +AliasAnalysis::AliasResult +ScalarEvolutionAliasAnalysis::alias(const Location &LocA, + const Location &LocB) { + // If either of the memory references is empty, it doesn't matter what the + // pointer values are. This allows the code below to ignore this special + // case. + if (LocA.Size == 0 || LocB.Size == 0) + return NoAlias; + + // This is ScalarEvolutionAliasAnalysis. Get the SCEVs! + const SCEV *AS = SE->getSCEV(const_cast<Value *>(LocA.Ptr)); + const SCEV *BS = SE->getSCEV(const_cast<Value *>(LocB.Ptr)); + + // If they evaluate to the same expression, it's a MustAlias. + if (AS == BS) return MustAlias; + + // If something is known about the difference between the two addresses, + // see if it's enough to prove a NoAlias. + if (SE->getEffectiveSCEVType(AS->getType()) == + SE->getEffectiveSCEVType(BS->getType())) { + unsigned BitWidth = SE->getTypeSizeInBits(AS->getType()); + APInt ASizeInt(BitWidth, LocA.Size); + APInt BSizeInt(BitWidth, LocB.Size); + + // Compute the difference between the two pointers. + const SCEV *BA = SE->getMinusSCEV(BS, AS); + + // Test whether the difference is known to be great enough that memory of + // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt + // are non-zero, which is special-cased above. + if (ASizeInt.ule(SE->getUnsignedRange(BA).getUnsignedMin()) && + (-BSizeInt).uge(SE->getUnsignedRange(BA).getUnsignedMax())) + return NoAlias; + + // Folding the subtraction while preserving range information can be tricky + // (because of INT_MIN, etc.); if the prior test failed, swap AS and BS + // and try again to see if things fold better that way. + + // Compute the difference between the two pointers. + const SCEV *AB = SE->getMinusSCEV(AS, BS); + + // Test whether the difference is known to be great enough that memory of + // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt + // are non-zero, which is special-cased above. + if (BSizeInt.ule(SE->getUnsignedRange(AB).getUnsignedMin()) && + (-ASizeInt).uge(SE->getUnsignedRange(AB).getUnsignedMax())) + return NoAlias; + } + + // If ScalarEvolution can find an underlying object, form a new query. + // The correctness of this depends on ScalarEvolution not recognizing + // inttoptr and ptrtoint operators. + Value *AO = GetBaseValue(AS); + Value *BO = GetBaseValue(BS); + if ((AO && AO != LocA.Ptr) || (BO && BO != LocB.Ptr)) + if (alias(Location(AO ? AO : LocA.Ptr, + AO ? +UnknownSize : LocA.Size, + AO ? 0 : LocA.TBAATag), + Location(BO ? BO : LocB.Ptr, + BO ? +UnknownSize : LocB.Size, + BO ? 0 : LocB.TBAATag)) == NoAlias) + return NoAlias; + + // Forward the query to the next analysis. + return AliasAnalysis::alias(LocA, LocB); +} diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp new file mode 100644 index 0000000..befe6d2 --- /dev/null +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -0,0 +1,1403 @@ +//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the scalar evolution expander, +// which is used to generate the code corresponding to a given scalar evolution +// expression. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/STLExtras.h" + +using namespace llvm; + +/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP, +/// reusing an existing cast if a suitable one exists, moving an existing +/// cast if a suitable one exists but isn't in the right place, or +/// creating a new one. +Value *SCEVExpander::ReuseOrCreateCast(Value *V, const Type *Ty, + Instruction::CastOps Op, + BasicBlock::iterator IP) { + // Check to see if there is already a cast! + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + User *U = *UI; + if (U->getType() == Ty) + if (CastInst *CI = dyn_cast<CastInst>(U)) + if (CI->getOpcode() == Op) { + // If the cast isn't where we want it, fix it. + if (BasicBlock::iterator(CI) != IP) { + // Create a new cast, and leave the old cast in place in case + // it is being used as an insert point. Clear its operand + // so that it doesn't hold anything live. + Instruction *NewCI = CastInst::Create(Op, V, Ty, "", IP); + NewCI->takeName(CI); + CI->replaceAllUsesWith(NewCI); + CI->setOperand(0, UndefValue::get(V->getType())); + rememberInstruction(NewCI); + return NewCI; + } + rememberInstruction(CI); + return CI; + } + } + + // Create a new cast. + Instruction *I = CastInst::Create(Op, V, Ty, V->getName(), IP); + rememberInstruction(I); + return I; +} + +/// InsertNoopCastOfTo - Insert a cast of V to the specified type, +/// which must be possible with a noop cast, doing what we can to share +/// the casts. +Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) { + Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false); + assert((Op == Instruction::BitCast || + Op == Instruction::PtrToInt || + Op == Instruction::IntToPtr) && + "InsertNoopCastOfTo cannot perform non-noop casts!"); + assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) && + "InsertNoopCastOfTo cannot change sizes!"); + + // Short-circuit unnecessary bitcasts. + if (Op == Instruction::BitCast && V->getType() == Ty) + return V; + + // Short-circuit unnecessary inttoptr<->ptrtoint casts. + if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) && + SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) { + if (CastInst *CI = dyn_cast<CastInst>(V)) + if ((CI->getOpcode() == Instruction::PtrToInt || + CI->getOpcode() == Instruction::IntToPtr) && + SE.getTypeSizeInBits(CI->getType()) == + SE.getTypeSizeInBits(CI->getOperand(0)->getType())) + return CI->getOperand(0); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + if ((CE->getOpcode() == Instruction::PtrToInt || + CE->getOpcode() == Instruction::IntToPtr) && + SE.getTypeSizeInBits(CE->getType()) == + SE.getTypeSizeInBits(CE->getOperand(0)->getType())) + return CE->getOperand(0); + } + + // Fold a cast of a constant. + if (Constant *C = dyn_cast<Constant>(V)) + return ConstantExpr::getCast(Op, C, Ty); + + // Cast the argument at the beginning of the entry block, after + // any bitcasts of other arguments. + if (Argument *A = dyn_cast<Argument>(V)) { + BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin(); + while ((isa<BitCastInst>(IP) && + isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) && + cast<BitCastInst>(IP)->getOperand(0) != A) || + isa<DbgInfoIntrinsic>(IP)) + ++IP; + return ReuseOrCreateCast(A, Ty, Op, IP); + } + + // Cast the instruction immediately after the instruction. + Instruction *I = cast<Instruction>(V); + BasicBlock::iterator IP = I; ++IP; + if (InvokeInst *II = dyn_cast<InvokeInst>(I)) + IP = II->getNormalDest()->begin(); + while (isa<PHINode>(IP) || isa<DbgInfoIntrinsic>(IP)) ++IP; + return ReuseOrCreateCast(I, Ty, Op, IP); +} + +/// InsertBinop - Insert the specified binary operator, doing a small amount +/// of work to avoid inserting an obviously redundant operation. +Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, + Value *LHS, Value *RHS) { + // Fold a binop with constant operands. + if (Constant *CLHS = dyn_cast<Constant>(LHS)) + if (Constant *CRHS = dyn_cast<Constant>(RHS)) + return ConstantExpr::get(Opcode, CLHS, CRHS); + + // Do a quick scan to see if we have this binop nearby. If so, reuse it. + unsigned ScanLimit = 6; + BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin(); + // Scanning starts from the last instruction before the insertion point. + BasicBlock::iterator IP = Builder.GetInsertPoint(); + if (IP != BlockBegin) { + --IP; + for (; ScanLimit; --IP, --ScanLimit) { + // Don't count dbg.value against the ScanLimit, to avoid perturbing the + // generated code. + if (isa<DbgInfoIntrinsic>(IP)) + ScanLimit++; + if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS && + IP->getOperand(1) == RHS) + return IP; + if (IP == BlockBegin) break; + } + } + + // Save the original insertion point so we can restore it when we're done. + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + + // Move the insertion point out of as many loops as we can. + while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { + if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break; + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) break; + + // Ok, move up a level. + Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); + } + + // If we haven't found this binop, insert it. + Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS, "tmp")); + BO->setDebugLoc(SaveInsertPt->getDebugLoc()); + rememberInstruction(BO); + + // Restore the original insert point. + if (SaveInsertBB) + restoreInsertPoint(SaveInsertBB, SaveInsertPt); + + return BO; +} + +/// FactorOutConstant - Test if S is divisible by Factor, using signed +/// division. If so, update S with Factor divided out and return true. +/// S need not be evenly divisible if a reasonable remainder can be +/// computed. +/// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made +/// unnecessary; in its place, just signed-divide Ops[i] by the scale and +/// check to see if the divide was folded. +static bool FactorOutConstant(const SCEV *&S, + const SCEV *&Remainder, + const SCEV *Factor, + ScalarEvolution &SE, + const TargetData *TD) { + // Everything is divisible by one. + if (Factor->isOne()) + return true; + + // x/x == 1. + if (S == Factor) { + S = SE.getConstant(S->getType(), 1); + return true; + } + + // For a Constant, check for a multiple of the given factor. + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { + // 0/x == 0. + if (C->isZero()) + return true; + // Check for divisibility. + if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) { + ConstantInt *CI = + ConstantInt::get(SE.getContext(), + C->getValue()->getValue().sdiv( + FC->getValue()->getValue())); + // If the quotient is zero and the remainder is non-zero, reject + // the value at this scale. It will be considered for subsequent + // smaller scales. + if (!CI->isZero()) { + const SCEV *Div = SE.getConstant(CI); + S = Div; + Remainder = + SE.getAddExpr(Remainder, + SE.getConstant(C->getValue()->getValue().srem( + FC->getValue()->getValue()))); + return true; + } + } + } + + // In a Mul, check if there is a constant operand which is a multiple + // of the given factor. + if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) { + if (TD) { + // With TargetData, the size is known. Check if there is a constant + // operand which is a multiple of the given factor. If so, we can + // factor it. + const SCEVConstant *FC = cast<SCEVConstant>(Factor); + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0))) + if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) { + SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); + NewMulOps[0] = + SE.getConstant(C->getValue()->getValue().sdiv( + FC->getValue()->getValue())); + S = SE.getMulExpr(NewMulOps); + return true; + } + } else { + // Without TargetData, check if Factor can be factored out of any of the + // Mul's operands. If so, we can just remove it. + for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { + const SCEV *SOp = M->getOperand(i); + const SCEV *Remainder = SE.getConstant(SOp->getType(), 0); + if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) && + Remainder->isZero()) { + SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); + NewMulOps[i] = SOp; + S = SE.getMulExpr(NewMulOps); + return true; + } + } + } + } + + // In an AddRec, check if both start and step are divisible. + if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) { + const SCEV *Step = A->getStepRecurrence(SE); + const SCEV *StepRem = SE.getConstant(Step->getType(), 0); + if (!FactorOutConstant(Step, StepRem, Factor, SE, TD)) + return false; + if (!StepRem->isZero()) + return false; + const SCEV *Start = A->getStart(); + if (!FactorOutConstant(Start, Remainder, Factor, SE, TD)) + return false; + // FIXME: can use A->getNoWrapFlags(FlagNW) + S = SE.getAddRecExpr(Start, Step, A->getLoop(), SCEV::FlagAnyWrap); + return true; + } + + return false; +} + +/// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs +/// is the number of SCEVAddRecExprs present, which are kept at the end of +/// the list. +/// +static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops, + const Type *Ty, + ScalarEvolution &SE) { + unsigned NumAddRecs = 0; + for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i) + ++NumAddRecs; + // Group Ops into non-addrecs and addrecs. + SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs); + SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end()); + // Let ScalarEvolution sort and simplify the non-addrecs list. + const SCEV *Sum = NoAddRecs.empty() ? + SE.getConstant(Ty, 0) : + SE.getAddExpr(NoAddRecs); + // If it returned an add, use the operands. Otherwise it simplified + // the sum into a single value, so just use that. + Ops.clear(); + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum)) + Ops.append(Add->op_begin(), Add->op_end()); + else if (!Sum->isZero()) + Ops.push_back(Sum); + // Then append the addrecs. + Ops.append(AddRecs.begin(), AddRecs.end()); +} + +/// SplitAddRecs - Flatten a list of add operands, moving addrec start values +/// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}. +/// This helps expose more opportunities for folding parts of the expressions +/// into GEP indices. +/// +static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops, + const Type *Ty, + ScalarEvolution &SE) { + // Find the addrecs. + SmallVector<const SCEV *, 8> AddRecs; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) { + const SCEV *Start = A->getStart(); + if (Start->isZero()) break; + const SCEV *Zero = SE.getConstant(Ty, 0); + AddRecs.push_back(SE.getAddRecExpr(Zero, + A->getStepRecurrence(SE), + A->getLoop(), + // FIXME: A->getNoWrapFlags(FlagNW) + SCEV::FlagAnyWrap)); + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) { + Ops[i] = Zero; + Ops.append(Add->op_begin(), Add->op_end()); + e += Add->getNumOperands(); + } else { + Ops[i] = Start; + } + } + if (!AddRecs.empty()) { + // Add the addrecs onto the end of the list. + Ops.append(AddRecs.begin(), AddRecs.end()); + // Resort the operand list, moving any constants to the front. + SimplifyAddOperands(Ops, Ty, SE); + } +} + +/// expandAddToGEP - Expand an addition expression with a pointer type into +/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps +/// BasicAliasAnalysis and other passes analyze the result. See the rules +/// for getelementptr vs. inttoptr in +/// http://llvm.org/docs/LangRef.html#pointeraliasing +/// for details. +/// +/// Design note: The correctness of using getelementptr here depends on +/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as +/// they may introduce pointer arithmetic which may not be safely converted +/// into getelementptr. +/// +/// Design note: It might seem desirable for this function to be more +/// loop-aware. If some of the indices are loop-invariant while others +/// aren't, it might seem desirable to emit multiple GEPs, keeping the +/// loop-invariant portions of the overall computation outside the loop. +/// However, there are a few reasons this is not done here. Hoisting simple +/// arithmetic is a low-level optimization that often isn't very +/// important until late in the optimization process. In fact, passes +/// like InstructionCombining will combine GEPs, even if it means +/// pushing loop-invariant computation down into loops, so even if the +/// GEPs were split here, the work would quickly be undone. The +/// LoopStrengthReduction pass, which is usually run quite late (and +/// after the last InstructionCombining pass), takes care of hoisting +/// loop-invariant portions of expressions, after considering what +/// can be folded using target addressing modes. +/// +Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, + const SCEV *const *op_end, + const PointerType *PTy, + const Type *Ty, + Value *V) { + const Type *ElTy = PTy->getElementType(); + SmallVector<Value *, 4> GepIndices; + SmallVector<const SCEV *, 8> Ops(op_begin, op_end); + bool AnyNonZeroIndices = false; + + // Split AddRecs up into parts as either of the parts may be usable + // without the other. + SplitAddRecs(Ops, Ty, SE); + + // Descend down the pointer's type and attempt to convert the other + // operands into GEP indices, at each level. The first index in a GEP + // indexes into the array implied by the pointer operand; the rest of + // the indices index into the element or field type selected by the + // preceding index. + for (;;) { + // If the scale size is not 0, attempt to factor out a scale for + // array indexing. + SmallVector<const SCEV *, 8> ScaledOps; + if (ElTy->isSized()) { + const SCEV *ElSize = SE.getSizeOfExpr(ElTy); + if (!ElSize->isZero()) { + SmallVector<const SCEV *, 8> NewOps; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + const SCEV *Op = Ops[i]; + const SCEV *Remainder = SE.getConstant(Ty, 0); + if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) { + // Op now has ElSize factored out. + ScaledOps.push_back(Op); + if (!Remainder->isZero()) + NewOps.push_back(Remainder); + AnyNonZeroIndices = true; + } else { + // The operand was not divisible, so add it to the list of operands + // we'll scan next iteration. + NewOps.push_back(Ops[i]); + } + } + // If we made any changes, update Ops. + if (!ScaledOps.empty()) { + Ops = NewOps; + SimplifyAddOperands(Ops, Ty, SE); + } + } + } + + // Record the scaled array index for this level of the type. If + // we didn't find any operands that could be factored, tentatively + // assume that element zero was selected (since the zero offset + // would obviously be folded away). + Value *Scaled = ScaledOps.empty() ? + Constant::getNullValue(Ty) : + expandCodeFor(SE.getAddExpr(ScaledOps), Ty); + GepIndices.push_back(Scaled); + + // Collect struct field index operands. + while (const StructType *STy = dyn_cast<StructType>(ElTy)) { + bool FoundFieldNo = false; + // An empty struct has no fields. + if (STy->getNumElements() == 0) break; + if (SE.TD) { + // With TargetData, field offsets are known. See if a constant offset + // falls within any of the struct fields. + if (Ops.empty()) break; + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0])) + if (SE.getTypeSizeInBits(C->getType()) <= 64) { + const StructLayout &SL = *SE.TD->getStructLayout(STy); + uint64_t FullOffset = C->getValue()->getZExtValue(); + if (FullOffset < SL.getSizeInBytes()) { + unsigned ElIdx = SL.getElementContainingOffset(FullOffset); + GepIndices.push_back( + ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx)); + ElTy = STy->getTypeAtIndex(ElIdx); + Ops[0] = + SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx)); + AnyNonZeroIndices = true; + FoundFieldNo = true; + } + } + } else { + // Without TargetData, just check for an offsetof expression of the + // appropriate struct type. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Ops[i])) { + const Type *CTy; + Constant *FieldNo; + if (U->isOffsetOf(CTy, FieldNo) && CTy == STy) { + GepIndices.push_back(FieldNo); + ElTy = + STy->getTypeAtIndex(cast<ConstantInt>(FieldNo)->getZExtValue()); + Ops[i] = SE.getConstant(Ty, 0); + AnyNonZeroIndices = true; + FoundFieldNo = true; + break; + } + } + } + // If no struct field offsets were found, tentatively assume that + // field zero was selected (since the zero offset would obviously + // be folded away). + if (!FoundFieldNo) { + ElTy = STy->getTypeAtIndex(0u); + GepIndices.push_back( + Constant::getNullValue(Type::getInt32Ty(Ty->getContext()))); + } + } + + if (const ArrayType *ATy = dyn_cast<ArrayType>(ElTy)) + ElTy = ATy->getElementType(); + else + break; + } + + // If none of the operands were convertible to proper GEP indices, cast + // the base to i8* and do an ugly getelementptr with that. It's still + // better than ptrtoint+arithmetic+inttoptr at least. + if (!AnyNonZeroIndices) { + // Cast the base to i8*. + V = InsertNoopCastOfTo(V, + Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace())); + + // Expand the operands for a plain byte offset. + Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty); + + // Fold a GEP with constant operands. + if (Constant *CLHS = dyn_cast<Constant>(V)) + if (Constant *CRHS = dyn_cast<Constant>(Idx)) + return ConstantExpr::getGetElementPtr(CLHS, &CRHS, 1); + + // Do a quick scan to see if we have this GEP nearby. If so, reuse it. + unsigned ScanLimit = 6; + BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin(); + // Scanning starts from the last instruction before the insertion point. + BasicBlock::iterator IP = Builder.GetInsertPoint(); + if (IP != BlockBegin) { + --IP; + for (; ScanLimit; --IP, --ScanLimit) { + // Don't count dbg.value against the ScanLimit, to avoid perturbing the + // generated code. + if (isa<DbgInfoIntrinsic>(IP)) + ScanLimit++; + if (IP->getOpcode() == Instruction::GetElementPtr && + IP->getOperand(0) == V && IP->getOperand(1) == Idx) + return IP; + if (IP == BlockBegin) break; + } + } + + // Save the original insertion point so we can restore it when we're done. + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + + // Move the insertion point out of as many loops as we can. + while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { + if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break; + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) break; + + // Ok, move up a level. + Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); + } + + // Emit a GEP. + Value *GEP = Builder.CreateGEP(V, Idx, "uglygep"); + rememberInstruction(GEP); + + // Restore the original insert point. + if (SaveInsertBB) + restoreInsertPoint(SaveInsertBB, SaveInsertPt); + + return GEP; + } + + // Save the original insertion point so we can restore it when we're done. + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + + // Move the insertion point out of as many loops as we can. + while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { + if (!L->isLoopInvariant(V)) break; + + bool AnyIndexNotLoopInvariant = false; + for (SmallVectorImpl<Value *>::const_iterator I = GepIndices.begin(), + E = GepIndices.end(); I != E; ++I) + if (!L->isLoopInvariant(*I)) { + AnyIndexNotLoopInvariant = true; + break; + } + if (AnyIndexNotLoopInvariant) + break; + + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) break; + + // Ok, move up a level. + Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); + } + + // Insert a pretty getelementptr. Note that this GEP is not marked inbounds, + // because ScalarEvolution may have changed the address arithmetic to + // compute a value which is beyond the end of the allocated object. + Value *Casted = V; + if (V->getType() != PTy) + Casted = InsertNoopCastOfTo(Casted, PTy); + Value *GEP = Builder.CreateGEP(Casted, + GepIndices.begin(), + GepIndices.end(), + "scevgep"); + Ops.push_back(SE.getUnknown(GEP)); + rememberInstruction(GEP); + + // Restore the original insert point. + if (SaveInsertBB) + restoreInsertPoint(SaveInsertBB, SaveInsertPt); + + return expand(SE.getAddExpr(Ops)); +} + +/// isNonConstantNegative - Return true if the specified scev is negated, but +/// not a constant. +static bool isNonConstantNegative(const SCEV *F) { + const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(F); + if (!Mul) return false; + + // If there is a constant factor, it will be first. + const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0)); + if (!SC) return false; + + // Return true if the value is negative, this matches things like (-42 * V). + return SC->getValue()->getValue().isNegative(); +} + +/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for +/// SCEV expansion. If they are nested, this is the most nested. If they are +/// neighboring, pick the later. +static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B, + DominatorTree &DT) { + if (!A) return B; + if (!B) return A; + if (A->contains(B)) return B; + if (B->contains(A)) return A; + if (DT.dominates(A->getHeader(), B->getHeader())) return B; + if (DT.dominates(B->getHeader(), A->getHeader())) return A; + return A; // Arbitrarily break the tie. +} + +/// getRelevantLoop - Get the most relevant loop associated with the given +/// expression, according to PickMostRelevantLoop. +const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { + // Test whether we've already computed the most relevant loop for this SCEV. + std::pair<DenseMap<const SCEV *, const Loop *>::iterator, bool> Pair = + RelevantLoops.insert(std::make_pair(S, static_cast<const Loop *>(0))); + if (!Pair.second) + return Pair.first->second; + + if (isa<SCEVConstant>(S)) + // A constant has no relevant loops. + return 0; + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { + if (const Instruction *I = dyn_cast<Instruction>(U->getValue())) + return Pair.first->second = SE.LI->getLoopFor(I->getParent()); + // A non-instruction has no relevant loops. + return 0; + } + if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) { + const Loop *L = 0; + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) + L = AR->getLoop(); + for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end(); + I != E; ++I) + L = PickMostRelevantLoop(L, getRelevantLoop(*I), *SE.DT); + return RelevantLoops[N] = L; + } + if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) { + const Loop *Result = getRelevantLoop(C->getOperand()); + return RelevantLoops[C] = Result; + } + if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { + const Loop *Result = + PickMostRelevantLoop(getRelevantLoop(D->getLHS()), + getRelevantLoop(D->getRHS()), + *SE.DT); + return RelevantLoops[D] = Result; + } + llvm_unreachable("Unexpected SCEV type!"); + return 0; +} + +namespace { + +/// LoopCompare - Compare loops by PickMostRelevantLoop. +class LoopCompare { + DominatorTree &DT; +public: + explicit LoopCompare(DominatorTree &dt) : DT(dt) {} + + bool operator()(std::pair<const Loop *, const SCEV *> LHS, + std::pair<const Loop *, const SCEV *> RHS) const { + // Keep pointer operands sorted at the end. + if (LHS.second->getType()->isPointerTy() != + RHS.second->getType()->isPointerTy()) + return LHS.second->getType()->isPointerTy(); + + // Compare loops with PickMostRelevantLoop. + if (LHS.first != RHS.first) + return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first; + + // If one operand is a non-constant negative and the other is not, + // put the non-constant negative on the right so that a sub can + // be used instead of a negate and add. + if (isNonConstantNegative(LHS.second)) { + if (!isNonConstantNegative(RHS.second)) + return false; + } else if (isNonConstantNegative(RHS.second)) + return true; + + // Otherwise they are equivalent according to this comparison. + return false; + } +}; + +} + +Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { + const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + + // Collect all the add operands in a loop, along with their associated loops. + // Iterate in reverse so that constants are emitted last, all else equal, and + // so that pointer operands are inserted first, which the code below relies on + // to form more involved GEPs. + SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops; + for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()), + E(S->op_begin()); I != E; ++I) + OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); + + // Sort by loop. Use a stable sort so that constants follow non-constants and + // pointer operands precede non-pointer operands. + std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT)); + + // Emit instructions to add all the operands. Hoist as much as possible + // out of loops, and form meaningful getelementptrs where possible. + Value *Sum = 0; + for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator + I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { + const Loop *CurLoop = I->first; + const SCEV *Op = I->second; + if (!Sum) { + // This is the first operand. Just expand it. + Sum = expand(Op); + ++I; + } else if (const PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) { + // The running sum expression is a pointer. Try to form a getelementptr + // at this level with that as the base. + SmallVector<const SCEV *, 4> NewOps; + for (; I != E && I->first == CurLoop; ++I) { + // If the operand is SCEVUnknown and not instructions, peek through + // it, to enable more of it to be folded into the GEP. + const SCEV *X = I->second; + if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X)) + if (!isa<Instruction>(U->getValue())) + X = SE.getSCEV(U->getValue()); + NewOps.push_back(X); + } + Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum); + } else if (const PointerType *PTy = dyn_cast<PointerType>(Op->getType())) { + // The running sum is an integer, and there's a pointer at this level. + // Try to form a getelementptr. If the running sum is instructions, + // use a SCEVUnknown to avoid re-analyzing them. + SmallVector<const SCEV *, 4> NewOps; + NewOps.push_back(isa<Instruction>(Sum) ? SE.getUnknown(Sum) : + SE.getSCEV(Sum)); + for (++I; I != E && I->first == CurLoop; ++I) + NewOps.push_back(I->second); + Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op)); + } else if (isNonConstantNegative(Op)) { + // Instead of doing a negate and add, just do a subtract. + Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty); + Sum = InsertNoopCastOfTo(Sum, Ty); + Sum = InsertBinop(Instruction::Sub, Sum, W); + ++I; + } else { + // A simple add. + Value *W = expandCodeFor(Op, Ty); + Sum = InsertNoopCastOfTo(Sum, Ty); + // Canonicalize a constant to the RHS. + if (isa<Constant>(Sum)) std::swap(Sum, W); + Sum = InsertBinop(Instruction::Add, Sum, W); + ++I; + } + } + + return Sum; +} + +Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { + const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + + // Collect all the mul operands in a loop, along with their associated loops. + // Iterate in reverse so that constants are emitted last, all else equal. + SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops; + for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()), + E(S->op_begin()); I != E; ++I) + OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); + + // Sort by loop. Use a stable sort so that constants follow non-constants. + std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT)); + + // Emit instructions to mul all the operands. Hoist as much as possible + // out of loops. + Value *Prod = 0; + for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator + I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { + const SCEV *Op = I->second; + if (!Prod) { + // This is the first operand. Just expand it. + Prod = expand(Op); + ++I; + } else if (Op->isAllOnesValue()) { + // Instead of doing a multiply by negative one, just do a negate. + Prod = InsertNoopCastOfTo(Prod, Ty); + Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod); + ++I; + } else { + // A simple mul. + Value *W = expandCodeFor(Op, Ty); + Prod = InsertNoopCastOfTo(Prod, Ty); + // Canonicalize a constant to the RHS. + if (isa<Constant>(Prod)) std::swap(Prod, W); + Prod = InsertBinop(Instruction::Mul, Prod, W); + ++I; + } + } + + return Prod; +} + +Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { + const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + + Value *LHS = expandCodeFor(S->getLHS(), Ty); + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) { + const APInt &RHS = SC->getValue()->getValue(); + if (RHS.isPowerOf2()) + return InsertBinop(Instruction::LShr, LHS, + ConstantInt::get(Ty, RHS.logBase2())); + } + + Value *RHS = expandCodeFor(S->getRHS(), Ty); + return InsertBinop(Instruction::UDiv, LHS, RHS); +} + +/// Move parts of Base into Rest to leave Base with the minimal +/// expression that provides a pointer operand suitable for a +/// GEP expansion. +static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, + ScalarEvolution &SE) { + while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) { + Base = A->getStart(); + Rest = SE.getAddExpr(Rest, + SE.getAddRecExpr(SE.getConstant(A->getType(), 0), + A->getStepRecurrence(SE), + A->getLoop(), + // FIXME: A->getNoWrapFlags(FlagNW) + SCEV::FlagAnyWrap)); + } + if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) { + Base = A->getOperand(A->getNumOperands()-1); + SmallVector<const SCEV *, 8> NewAddOps(A->op_begin(), A->op_end()); + NewAddOps.back() = Rest; + Rest = SE.getAddExpr(NewAddOps); + ExposePointerBase(Base, Rest, SE); + } +} + +/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand +/// the base addrec, which is the addrec without any non-loop-dominating +/// values, and return the PHI. +PHINode * +SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, + const Loop *L, + const Type *ExpandTy, + const Type *IntTy) { + assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position"); + + // Reuse a previously-inserted PHI, if present. + for (BasicBlock::iterator I = L->getHeader()->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) + if (SE.isSCEVable(PN->getType()) && + (SE.getEffectiveSCEVType(PN->getType()) == + SE.getEffectiveSCEVType(Normalized->getType())) && + SE.getSCEV(PN) == Normalized) + if (BasicBlock *LatchBlock = L->getLoopLatch()) { + Instruction *IncV = + cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)); + + // Determine if this is a well-behaved chain of instructions leading + // back to the PHI. It probably will be, if we're scanning an inner + // loop already visited by LSR for example, but it wouldn't have + // to be. + do { + if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) || + (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV))) { + IncV = 0; + break; + } + // If any of the operands don't dominate the insert position, bail. + // Addrec operands are always loop-invariant, so this can only happen + // if there are instructions which haven't been hoisted. + if (L == IVIncInsertLoop) { + for (User::op_iterator OI = IncV->op_begin()+1, + OE = IncV->op_end(); OI != OE; ++OI) + if (Instruction *OInst = dyn_cast<Instruction>(OI)) + if (!SE.DT->dominates(OInst, IVIncInsertPos)) { + IncV = 0; + break; + } + } + if (!IncV) + break; + // Advance to the next instruction. + IncV = dyn_cast<Instruction>(IncV->getOperand(0)); + if (!IncV) + break; + if (IncV->mayHaveSideEffects()) { + IncV = 0; + break; + } + } while (IncV != PN); + + if (IncV) { + // Ok, the add recurrence looks usable. + // Remember this PHI, even in post-inc mode. + InsertedValues.insert(PN); + // Remember the increment. + IncV = cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)); + rememberInstruction(IncV); + if (L == IVIncInsertLoop) + do { + if (SE.DT->dominates(IncV, IVIncInsertPos)) + break; + // Make sure the increment is where we want it. But don't move it + // down past a potential existing post-inc user. + IncV->moveBefore(IVIncInsertPos); + IVIncInsertPos = IncV; + IncV = cast<Instruction>(IncV->getOperand(0)); + } while (IncV != PN); + return PN; + } + } + + // Save the original insertion point so we can restore it when we're done. + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + + // Expand code for the start value. + Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy, + L->getHeader()->begin()); + + // StartV must be hoisted into L's preheader to dominate the new phi. + assert(!isa<Instruction>(StartV) || + SE.DT->properlyDominates(cast<Instruction>(StartV)->getParent(), + L->getHeader())); + + // Expand code for the step value. Insert instructions right before the + // terminator corresponding to the back-edge. Do this before creating the PHI + // so that PHI reuse code doesn't see an incomplete PHI. If the stride is + // negative, insert a sub instead of an add for the increment (unless it's a + // constant, because subtracts of constants are canonicalized to adds). + const SCEV *Step = Normalized->getStepRecurrence(SE); + bool isPointer = ExpandTy->isPointerTy(); + bool isNegative = !isPointer && isNonConstantNegative(Step); + if (isNegative) + Step = SE.getNegativeSCEV(Step); + Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); + + // Create the PHI. + BasicBlock *Header = L->getHeader(); + Builder.SetInsertPoint(Header, Header->begin()); + pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); + PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE), + Twine(IVName) + ".iv"); + rememberInstruction(PN); + + // Create the step instructions and populate the PHI. + for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { + BasicBlock *Pred = *HPI; + + // Add a start value. + if (!L->contains(Pred)) { + PN->addIncoming(StartV, Pred); + continue; + } + + // Create a step value and add it to the PHI. If IVIncInsertLoop is + // non-null and equal to the addrec's loop, insert the instructions + // at IVIncInsertPos. + Instruction *InsertPos = L == IVIncInsertLoop ? + IVIncInsertPos : Pred->getTerminator(); + Builder.SetInsertPoint(InsertPos); + Value *IncV; + // If the PHI is a pointer, use a GEP, otherwise use an add or sub. + if (isPointer) { + const PointerType *GEPPtrTy = cast<PointerType>(ExpandTy); + // If the step isn't constant, don't use an implicitly scaled GEP, because + // that would require a multiply inside the loop. + if (!isa<ConstantInt>(StepV)) + GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()), + GEPPtrTy->getAddressSpace()); + const SCEV *const StepArray[1] = { SE.getSCEV(StepV) }; + IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN); + if (IncV->getType() != PN->getType()) { + IncV = Builder.CreateBitCast(IncV, PN->getType(), "tmp"); + rememberInstruction(IncV); + } + } else { + IncV = isNegative ? + Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") : + Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next"); + rememberInstruction(IncV); + } + PN->addIncoming(IncV, Pred); + } + + // Restore the original insert point. + if (SaveInsertBB) + restoreInsertPoint(SaveInsertBB, SaveInsertPt); + + // Remember this PHI, even in post-inc mode. + InsertedValues.insert(PN); + + return PN; +} + +Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { + const Type *STy = S->getType(); + const Type *IntTy = SE.getEffectiveSCEVType(STy); + const Loop *L = S->getLoop(); + + // Determine a normalized form of this expression, which is the expression + // before any post-inc adjustment is made. + const SCEVAddRecExpr *Normalized = S; + if (PostIncLoops.count(L)) { + PostIncLoopSet Loops; + Loops.insert(L); + Normalized = + cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, 0, 0, + Loops, SE, *SE.DT)); + } + + // Strip off any non-loop-dominating component from the addrec start. + const SCEV *Start = Normalized->getStart(); + const SCEV *PostLoopOffset = 0; + if (!SE.properlyDominates(Start, L->getHeader())) { + PostLoopOffset = Start; + Start = SE.getConstant(Normalized->getType(), 0); + Normalized = cast<SCEVAddRecExpr>( + SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE), + Normalized->getLoop(), + // FIXME: Normalized->getNoWrapFlags(FlagNW) + SCEV::FlagAnyWrap)); + } + + // Strip off any non-loop-dominating component from the addrec step. + const SCEV *Step = Normalized->getStepRecurrence(SE); + const SCEV *PostLoopScale = 0; + if (!SE.dominates(Step, L->getHeader())) { + PostLoopScale = Step; + Step = SE.getConstant(Normalized->getType(), 1); + Normalized = + cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start, Step, + Normalized->getLoop(), + // FIXME: Normalized + // ->getNoWrapFlags(FlagNW) + SCEV::FlagAnyWrap)); + } + + // Expand the core addrec. If we need post-loop scaling, force it to + // expand to an integer type to avoid the need for additional casting. + const Type *ExpandTy = PostLoopScale ? IntTy : STy; + PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy); + + // Accommodate post-inc mode, if necessary. + Value *Result; + if (!PostIncLoops.count(L)) + Result = PN; + else { + // In PostInc mode, use the post-incremented value. + BasicBlock *LatchBlock = L->getLoopLatch(); + assert(LatchBlock && "PostInc mode requires a unique loop latch!"); + Result = PN->getIncomingValueForBlock(LatchBlock); + } + + // Re-apply any non-loop-dominating scale. + if (PostLoopScale) { + Result = InsertNoopCastOfTo(Result, IntTy); + Result = Builder.CreateMul(Result, + expandCodeFor(PostLoopScale, IntTy)); + rememberInstruction(Result); + } + + // Re-apply any non-loop-dominating offset. + if (PostLoopOffset) { + if (const PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) { + const SCEV *const OffsetArray[1] = { PostLoopOffset }; + Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result); + } else { + Result = InsertNoopCastOfTo(Result, IntTy); + Result = Builder.CreateAdd(Result, + expandCodeFor(PostLoopOffset, IntTy)); + rememberInstruction(Result); + } + } + + return Result; +} + +Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { + if (!CanonicalMode) return expandAddRecExprLiterally(S); + + const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + const Loop *L = S->getLoop(); + + // First check for an existing canonical IV in a suitable type. + PHINode *CanonicalIV = 0; + if (PHINode *PN = L->getCanonicalInductionVariable()) + if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty)) + CanonicalIV = PN; + + // Rewrite an AddRec in terms of the canonical induction variable, if + // its type is more narrow. + if (CanonicalIV && + SE.getTypeSizeInBits(CanonicalIV->getType()) > + SE.getTypeSizeInBits(Ty)) { + SmallVector<const SCEV *, 4> NewOps(S->getNumOperands()); + for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) + NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType()); + Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(), + // FIXME: S->getNoWrapFlags(FlagNW) + SCEV::FlagAnyWrap)); + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + BasicBlock::iterator NewInsertPt = + llvm::next(BasicBlock::iterator(cast<Instruction>(V))); + while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt)) + ++NewInsertPt; + V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0, + NewInsertPt); + restoreInsertPoint(SaveInsertBB, SaveInsertPt); + return V; + } + + // {X,+,F} --> X + {0,+,F} + if (!S->getStart()->isZero()) { + SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end()); + NewOps[0] = SE.getConstant(Ty, 0); + // FIXME: can use S->getNoWrapFlags() + const SCEV *Rest = SE.getAddRecExpr(NewOps, L, SCEV::FlagAnyWrap); + + // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the + // comments on expandAddToGEP for details. + const SCEV *Base = S->getStart(); + const SCEV *RestArray[1] = { Rest }; + // Dig into the expression to find the pointer base for a GEP. + ExposePointerBase(Base, RestArray[0], SE); + // If we found a pointer, expand the AddRec with a GEP. + if (const PointerType *PTy = dyn_cast<PointerType>(Base->getType())) { + // Make sure the Base isn't something exotic, such as a multiplied + // or divided pointer value. In those cases, the result type isn't + // actually a pointer type. + if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) { + Value *StartV = expand(Base); + assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!"); + return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV); + } + } + + // Just do a normal add. Pre-expand the operands to suppress folding. + return expand(SE.getAddExpr(SE.getUnknown(expand(S->getStart())), + SE.getUnknown(expand(Rest)))); + } + + // If we don't yet have a canonical IV, create one. + if (!CanonicalIV) { + // Create and insert the PHI node for the induction variable in the + // specified loop. + BasicBlock *Header = L->getHeader(); + pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); + CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar", + Header->begin()); + rememberInstruction(CanonicalIV); + + Constant *One = ConstantInt::get(Ty, 1); + for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { + BasicBlock *HP = *HPI; + if (L->contains(HP)) { + // Insert a unit add instruction right before the terminator + // corresponding to the back-edge. + Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One, + "indvar.next", + HP->getTerminator()); + Add->setDebugLoc(HP->getTerminator()->getDebugLoc()); + rememberInstruction(Add); + CanonicalIV->addIncoming(Add, HP); + } else { + CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP); + } + } + } + + // {0,+,1} --> Insert a canonical induction variable into the loop! + if (S->isAffine() && S->getOperand(1)->isOne()) { + assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) && + "IVs with types different from the canonical IV should " + "already have been handled!"); + return CanonicalIV; + } + + // {0,+,F} --> {0,+,1} * F + + // If this is a simple linear addrec, emit it now as a special case. + if (S->isAffine()) // {0,+,F} --> i*F + return + expand(SE.getTruncateOrNoop( + SE.getMulExpr(SE.getUnknown(CanonicalIV), + SE.getNoopOrAnyExtend(S->getOperand(1), + CanonicalIV->getType())), + Ty)); + + // If this is a chain of recurrences, turn it into a closed form, using the + // folders, then expandCodeFor the closed form. This allows the folders to + // simplify the expression without having to build a bunch of special code + // into this folder. + const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV. + + // Promote S up to the canonical IV type, if the cast is foldable. + const SCEV *NewS = S; + const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType()); + if (isa<SCEVAddRecExpr>(Ext)) + NewS = Ext; + + const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE); + //cerr << "Evaluated: " << *this << "\n to: " << *V << "\n"; + + // Truncate the result down to the original type, if needed. + const SCEV *T = SE.getTruncateOrNoop(V, Ty); + return expand(T); +} + +Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) { + const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Value *V = expandCodeFor(S->getOperand(), + SE.getEffectiveSCEVType(S->getOperand()->getType())); + Value *I = Builder.CreateTrunc(V, Ty, "tmp"); + rememberInstruction(I); + return I; +} + +Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) { + const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Value *V = expandCodeFor(S->getOperand(), + SE.getEffectiveSCEVType(S->getOperand()->getType())); + Value *I = Builder.CreateZExt(V, Ty, "tmp"); + rememberInstruction(I); + return I; +} + +Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) { + const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Value *V = expandCodeFor(S->getOperand(), + SE.getEffectiveSCEVType(S->getOperand()->getType())); + Value *I = Builder.CreateSExt(V, Ty, "tmp"); + rememberInstruction(I); + return I; +} + +Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { + Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); + const Type *Ty = LHS->getType(); + for (int i = S->getNumOperands()-2; i >= 0; --i) { + // In the case of mixed integer and pointer types, do the + // rest of the comparisons as integer. + if (S->getOperand(i)->getType() != Ty) { + Ty = SE.getEffectiveSCEVType(Ty); + LHS = InsertNoopCastOfTo(LHS, Ty); + } + Value *RHS = expandCodeFor(S->getOperand(i), Ty); + Value *ICmp = Builder.CreateICmpSGT(LHS, RHS, "tmp"); + rememberInstruction(ICmp); + Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); + rememberInstruction(Sel); + LHS = Sel; + } + // In the case of mixed integer and pointer types, cast the + // final result back to the pointer type. + if (LHS->getType() != S->getType()) + LHS = InsertNoopCastOfTo(LHS, S->getType()); + return LHS; +} + +Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { + Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); + const Type *Ty = LHS->getType(); + for (int i = S->getNumOperands()-2; i >= 0; --i) { + // In the case of mixed integer and pointer types, do the + // rest of the comparisons as integer. + if (S->getOperand(i)->getType() != Ty) { + Ty = SE.getEffectiveSCEVType(Ty); + LHS = InsertNoopCastOfTo(LHS, Ty); + } + Value *RHS = expandCodeFor(S->getOperand(i), Ty); + Value *ICmp = Builder.CreateICmpUGT(LHS, RHS, "tmp"); + rememberInstruction(ICmp); + Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax"); + rememberInstruction(Sel); + LHS = Sel; + } + // In the case of mixed integer and pointer types, cast the + // final result back to the pointer type. + if (LHS->getType() != S->getType()) + LHS = InsertNoopCastOfTo(LHS, S->getType()); + return LHS; +} + +Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty, + Instruction *I) { + BasicBlock::iterator IP = I; + while (isInsertedInstruction(IP) || isa<DbgInfoIntrinsic>(IP)) + ++IP; + Builder.SetInsertPoint(IP->getParent(), IP); + return expandCodeFor(SH, Ty); +} + +Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty) { + // Expand the code for this SCEV. + Value *V = expand(SH); + if (Ty) { + assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) && + "non-trivial casts should be done with the SCEVs directly!"); + V = InsertNoopCastOfTo(V, Ty); + } + return V; +} + +Value *SCEVExpander::expand(const SCEV *S) { + // Compute an insertion point for this SCEV object. Hoist the instructions + // as far out in the loop nest as possible. + Instruction *InsertPt = Builder.GetInsertPoint(); + for (Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock()); ; + L = L->getParentLoop()) + if (SE.isLoopInvariant(S, L)) { + if (!L) break; + if (BasicBlock *Preheader = L->getLoopPreheader()) + InsertPt = Preheader->getTerminator(); + } else { + // If the SCEV is computable at this level, insert it into the header + // after the PHIs (and after any other instructions that we've inserted + // there) so that it is guaranteed to dominate any user inside the loop. + if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) + InsertPt = L->getHeader()->getFirstNonPHI(); + while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt)) + InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); + break; + } + + // Check to see if we already expanded this here. + std::map<std::pair<const SCEV *, Instruction *>, + AssertingVH<Value> >::iterator I = + InsertedExpressions.find(std::make_pair(S, InsertPt)); + if (I != InsertedExpressions.end()) + return I->second; + + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + Builder.SetInsertPoint(InsertPt->getParent(), InsertPt); + + // Expand the expression into instructions. + Value *V = visit(S); + + // Remember the expanded value for this SCEV at this location. + if (PostIncLoops.empty()) + InsertedExpressions[std::make_pair(S, InsertPt)] = V; + + restoreInsertPoint(SaveInsertBB, SaveInsertPt); + return V; +} + +void SCEVExpander::rememberInstruction(Value *I) { + if (!PostIncLoops.empty()) + InsertedPostIncValues.insert(I); + else + InsertedValues.insert(I); + + // If we just claimed an existing instruction and that instruction had + // been the insert point, adjust the insert point forward so that + // subsequently inserted code will be dominated. + if (Builder.GetInsertPoint() == I) { + BasicBlock::iterator It = cast<Instruction>(I); + do { ++It; } while (isInsertedInstruction(It) || + isa<DbgInfoIntrinsic>(It)); + Builder.SetInsertPoint(Builder.GetInsertBlock(), It); + } +} + +void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) { + // If we acquired more instructions since the old insert point was saved, + // advance past them. + while (isInsertedInstruction(I) || isa<DbgInfoIntrinsic>(I)) ++I; + + Builder.SetInsertPoint(BB, I); +} + +/// getOrInsertCanonicalInductionVariable - This method returns the +/// canonical induction variable of the specified type for the specified +/// loop (inserting one if there is none). A canonical induction variable +/// starts at zero and steps by one on each iteration. +PHINode * +SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, + const Type *Ty) { + assert(Ty->isIntegerTy() && "Can only insert integer induction variables!"); + + // Build a SCEV for {0,+,1}<L>. + // Conservatively use FlagAnyWrap for now. + const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0), + SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap); + + // Emit code for it. + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + PHINode *V = cast<PHINode>(expandCodeFor(H, 0, L->getHeader()->begin())); + if (SaveInsertBB) + restoreInsertPoint(SaveInsertBB, SaveInsertPt); + + return V; +} diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp new file mode 100644 index 0000000..60e630a --- /dev/null +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -0,0 +1,184 @@ +//===- ScalarEvolutionNormalization.cpp - See below -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements utilities for working with "normalized" expressions. +// See the comments at the top of ScalarEvolutionNormalization.h for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ScalarEvolutionNormalization.h" +using namespace llvm; + +/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression +/// and now we need to decide whether the user should use the preinc or post-inc +/// value. If this user should use the post-inc version of the IV, return true. +/// +/// Choosing wrong here can break dominance properties (if we choose to use the +/// post-inc value when we cannot) or it can end up adding extra live-ranges to +/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we +/// should use the post-inc value). +static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand, + const Loop *L, DominatorTree *DT) { + // If the user is in the loop, use the preinc value. + if (L->contains(User)) return false; + + BasicBlock *LatchBlock = L->getLoopLatch(); + if (!LatchBlock) + return false; + + // Ok, the user is outside of the loop. If it is dominated by the latch + // block, use the post-inc value. + if (DT->dominates(LatchBlock, User->getParent())) + return true; + + // There is one case we have to be careful of: PHI nodes. These little guys + // can live in blocks that are not dominated by the latch block, but (since + // their uses occur in the predecessor block, not the block the PHI lives in) + // should still use the post-inc value. Check for this case now. + PHINode *PN = dyn_cast<PHINode>(User); + if (!PN || !Operand) return false; // not a phi, not dominated by latch block. + + // Look at all of the uses of Operand by the PHI node. If any use corresponds + // to a block that is not dominated by the latch block, give up and use the + // preincremented value. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == Operand && + !DT->dominates(LatchBlock, PN->getIncomingBlock(i))) + return false; + + // Okay, all uses of Operand by PN are in predecessor blocks that really are + // dominated by the latch block. Use the post-incremented value. + return true; +} + +const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, + const SCEV *S, + Instruction *User, + Value *OperandValToReplace, + PostIncLoopSet &Loops, + ScalarEvolution &SE, + DominatorTree &DT) { + if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S)) + return S; + + if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) { + const SCEV *O = X->getOperand(); + const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace, + Loops, SE, DT); + if (O != N) + switch (S->getSCEVType()) { + case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType()); + case scSignExtend: return SE.getSignExtendExpr(N, S->getType()); + case scTruncate: return SE.getTruncateExpr(N, S->getType()); + default: llvm_unreachable("Unexpected SCEVCastExpr kind!"); + } + return S; + } + + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + // An addrec. This is the interesting part. + SmallVector<const SCEV *, 8> Operands; + const Loop *L = AR->getLoop(); + // The addrec conceptually uses its operands at loop entry. + Instruction *LUser = L->getHeader()->begin(); + // Transform each operand. + for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); + I != E; ++I) { + const SCEV *O = *I; + const SCEV *N = TransformForPostIncUse(Kind, O, LUser, 0, Loops, SE, DT); + Operands.push_back(N); + } + // Conservatively use AnyWrap until/unless we need FlagNW. + const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); + switch (Kind) { + default: llvm_unreachable("Unexpected transform name!"); + case NormalizeAutodetect: + if (IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) { + const SCEV *TransformedStep = + TransformForPostIncUse(Kind, AR->getStepRecurrence(SE), + User, OperandValToReplace, Loops, SE, DT); + Result = SE.getMinusSCEV(Result, TransformedStep); + Loops.insert(L); + } +#if 0 + // This assert is conceptually correct, but ScalarEvolution currently + // sometimes fails to canonicalize two equal SCEVs to exactly the same + // form. It's possibly a pessimization when this happens, but it isn't a + // correctness problem, so disable this assert for now. + assert(S == TransformForPostIncUse(Denormalize, Result, + User, OperandValToReplace, + Loops, SE, DT) && + "SCEV normalization is not invertible!"); +#endif + break; + case Normalize: + if (Loops.count(L)) { + const SCEV *TransformedStep = + TransformForPostIncUse(Kind, AR->getStepRecurrence(SE), + User, OperandValToReplace, Loops, SE, DT); + Result = SE.getMinusSCEV(Result, TransformedStep); + } +#if 0 + // See the comment on the assert above. + assert(S == TransformForPostIncUse(Denormalize, Result, + User, OperandValToReplace, + Loops, SE, DT) && + "SCEV normalization is not invertible!"); +#endif + break; + case Denormalize: + if (Loops.count(L)) + Result = cast<SCEVAddRecExpr>(Result)->getPostIncExpr(SE); + break; + } + return Result; + } + + if (const SCEVNAryExpr *X = dyn_cast<SCEVNAryExpr>(S)) { + SmallVector<const SCEV *, 8> Operands; + bool Changed = false; + // Transform each operand. + for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end(); + I != E; ++I) { + const SCEV *O = *I; + const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace, + Loops, SE, DT); + Changed |= N != O; + Operands.push_back(N); + } + // If any operand actually changed, return a transformed result. + if (Changed) + switch (S->getSCEVType()) { + case scAddExpr: return SE.getAddExpr(Operands); + case scMulExpr: return SE.getMulExpr(Operands); + case scSMaxExpr: return SE.getSMaxExpr(Operands); + case scUMaxExpr: return SE.getUMaxExpr(Operands); + default: llvm_unreachable("Unexpected SCEVNAryExpr kind!"); + } + return S; + } + + if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) { + const SCEV *LO = X->getLHS(); + const SCEV *RO = X->getRHS(); + const SCEV *LN = TransformForPostIncUse(Kind, LO, User, OperandValToReplace, + Loops, SE, DT); + const SCEV *RN = TransformForPostIncUse(Kind, RO, User, OperandValToReplace, + Loops, SE, DT); + if (LO != LN || RO != RN) + return SE.getUDivExpr(LN, RN); + return S; + } + + llvm_unreachable("Unexpected SCEV kind!"); + return 0; +} diff --git a/contrib/llvm/lib/Analysis/SparsePropagation.cpp b/contrib/llvm/lib/Analysis/SparsePropagation.cpp new file mode 100644 index 0000000..d8c207b --- /dev/null +++ b/contrib/llvm/lib/Analysis/SparsePropagation.cpp @@ -0,0 +1,347 @@ +//===- SparsePropagation.cpp - Sparse Conditional Property Propagation ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements an abstract sparse conditional propagation algorithm, +// modeled after SCCP, but with a customizable lattice function. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sparseprop" +#include "llvm/Analysis/SparsePropagation.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// AbstractLatticeFunction Implementation +//===----------------------------------------------------------------------===// + +AbstractLatticeFunction::~AbstractLatticeFunction() {} + +/// PrintValue - Render the specified lattice value to the specified stream. +void AbstractLatticeFunction::PrintValue(LatticeVal V, raw_ostream &OS) { + if (V == UndefVal) + OS << "undefined"; + else if (V == OverdefinedVal) + OS << "overdefined"; + else if (V == UntrackedVal) + OS << "untracked"; + else + OS << "unknown lattice value"; +} + +//===----------------------------------------------------------------------===// +// SparseSolver Implementation +//===----------------------------------------------------------------------===// + +/// getOrInitValueState - Return the LatticeVal object that corresponds to the +/// value, initializing the value's state if it hasn't been entered into the +/// map yet. This function is necessary because not all values should start +/// out in the underdefined state... Arguments should be overdefined, and +/// constants should be marked as constants. +/// +SparseSolver::LatticeVal SparseSolver::getOrInitValueState(Value *V) { + DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(V); + if (I != ValueState.end()) return I->second; // Common case, in the map + + LatticeVal LV; + if (LatticeFunc->IsUntrackedValue(V)) + return LatticeFunc->getUntrackedVal(); + else if (Constant *C = dyn_cast<Constant>(V)) + LV = LatticeFunc->ComputeConstant(C); + else if (Argument *A = dyn_cast<Argument>(V)) + LV = LatticeFunc->ComputeArgument(A); + else if (!isa<Instruction>(V)) + // All other non-instructions are overdefined. + LV = LatticeFunc->getOverdefinedVal(); + else + // All instructions are underdefined by default. + LV = LatticeFunc->getUndefVal(); + + // If this value is untracked, don't add it to the map. + if (LV == LatticeFunc->getUntrackedVal()) + return LV; + return ValueState[V] = LV; +} + +/// UpdateState - When the state for some instruction is potentially updated, +/// this function notices and adds I to the worklist if needed. +void SparseSolver::UpdateState(Instruction &Inst, LatticeVal V) { + DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(&Inst); + if (I != ValueState.end() && I->second == V) + return; // No change. + + // An update. Visit uses of I. + ValueState[&Inst] = V; + InstWorkList.push_back(&Inst); +} + +/// MarkBlockExecutable - This method can be used by clients to mark all of +/// the blocks that are known to be intrinsically live in the processed unit. +void SparseSolver::MarkBlockExecutable(BasicBlock *BB) { + DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << "\n"); + BBExecutable.insert(BB); // Basic block is executable! + BBWorkList.push_back(BB); // Add the block to the work list! +} + +/// markEdgeExecutable - Mark a basic block as executable, adding it to the BB +/// work list if it is not already executable... +void SparseSolver::markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) { + if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second) + return; // This edge is already known to be executable! + + DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName() + << " -> " << Dest->getName() << "\n"); + + if (BBExecutable.count(Dest)) { + // The destination is already executable, but we just made an edge + // feasible that wasn't before. Revisit the PHI nodes in the block + // because they have potentially new operands. + for (BasicBlock::iterator I = Dest->begin(); isa<PHINode>(I); ++I) + visitPHINode(*cast<PHINode>(I)); + + } else { + MarkBlockExecutable(Dest); + } +} + + +/// getFeasibleSuccessors - Return a vector of booleans to indicate which +/// successors are reachable from a given terminator instruction. +void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI, + SmallVectorImpl<bool> &Succs, + bool AggressiveUndef) { + Succs.resize(TI.getNumSuccessors()); + if (TI.getNumSuccessors() == 0) return; + + if (BranchInst *BI = dyn_cast<BranchInst>(&TI)) { + if (BI->isUnconditional()) { + Succs[0] = true; + return; + } + + LatticeVal BCValue; + if (AggressiveUndef) + BCValue = getOrInitValueState(BI->getCondition()); + else + BCValue = getLatticeState(BI->getCondition()); + + if (BCValue == LatticeFunc->getOverdefinedVal() || + BCValue == LatticeFunc->getUntrackedVal()) { + // Overdefined condition variables can branch either way. + Succs[0] = Succs[1] = true; + return; + } + + // If undefined, neither is feasible yet. + if (BCValue == LatticeFunc->getUndefVal()) + return; + + Constant *C = LatticeFunc->GetConstant(BCValue, BI->getCondition(), *this); + if (C == 0 || !isa<ConstantInt>(C)) { + // Non-constant values can go either way. + Succs[0] = Succs[1] = true; + return; + } + + // Constant condition variables mean the branch can only go a single way + Succs[C->isNullValue()] = true; + return; + } + + if (isa<InvokeInst>(TI)) { + // Invoke instructions successors are always executable. + // TODO: Could ask the lattice function if the value can throw. + Succs[0] = Succs[1] = true; + return; + } + + if (isa<IndirectBrInst>(TI)) { + Succs.assign(Succs.size(), true); + return; + } + + SwitchInst &SI = cast<SwitchInst>(TI); + LatticeVal SCValue; + if (AggressiveUndef) + SCValue = getOrInitValueState(SI.getCondition()); + else + SCValue = getLatticeState(SI.getCondition()); + + if (SCValue == LatticeFunc->getOverdefinedVal() || + SCValue == LatticeFunc->getUntrackedVal()) { + // All destinations are executable! + Succs.assign(TI.getNumSuccessors(), true); + return; + } + + // If undefined, neither is feasible yet. + if (SCValue == LatticeFunc->getUndefVal()) + return; + + Constant *C = LatticeFunc->GetConstant(SCValue, SI.getCondition(), *this); + if (C == 0 || !isa<ConstantInt>(C)) { + // All destinations are executable! + Succs.assign(TI.getNumSuccessors(), true); + return; + } + + Succs[SI.findCaseValue(cast<ConstantInt>(C))] = true; +} + + +/// isEdgeFeasible - Return true if the control flow edge from the 'From' +/// basic block to the 'To' basic block is currently feasible... +bool SparseSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To, + bool AggressiveUndef) { + SmallVector<bool, 16> SuccFeasible; + TerminatorInst *TI = From->getTerminator(); + getFeasibleSuccessors(*TI, SuccFeasible, AggressiveUndef); + + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + if (TI->getSuccessor(i) == To && SuccFeasible[i]) + return true; + + return false; +} + +void SparseSolver::visitTerminatorInst(TerminatorInst &TI) { + SmallVector<bool, 16> SuccFeasible; + getFeasibleSuccessors(TI, SuccFeasible, true); + + BasicBlock *BB = TI.getParent(); + + // Mark all feasible successors executable... + for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i) + if (SuccFeasible[i]) + markEdgeExecutable(BB, TI.getSuccessor(i)); +} + +void SparseSolver::visitPHINode(PHINode &PN) { + // The lattice function may store more information on a PHINode than could be + // computed from its incoming values. For example, SSI form stores its sigma + // functions as PHINodes with a single incoming value. + if (LatticeFunc->IsSpecialCasedPHI(&PN)) { + LatticeVal IV = LatticeFunc->ComputeInstructionState(PN, *this); + if (IV != LatticeFunc->getUntrackedVal()) + UpdateState(PN, IV); + return; + } + + LatticeVal PNIV = getOrInitValueState(&PN); + LatticeVal Overdefined = LatticeFunc->getOverdefinedVal(); + + // If this value is already overdefined (common) just return. + if (PNIV == Overdefined || PNIV == LatticeFunc->getUntrackedVal()) + return; // Quick exit + + // Super-extra-high-degree PHI nodes are unlikely to ever be interesting, + // and slow us down a lot. Just mark them overdefined. + if (PN.getNumIncomingValues() > 64) { + UpdateState(PN, Overdefined); + return; + } + + // Look at all of the executable operands of the PHI node. If any of them + // are overdefined, the PHI becomes overdefined as well. Otherwise, ask the + // transfer function to give us the merge of the incoming values. + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { + // If the edge is not yet known to be feasible, it doesn't impact the PHI. + if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent(), true)) + continue; + + // Merge in this value. + LatticeVal OpVal = getOrInitValueState(PN.getIncomingValue(i)); + if (OpVal != PNIV) + PNIV = LatticeFunc->MergeValues(PNIV, OpVal); + + if (PNIV == Overdefined) + break; // Rest of input values don't matter. + } + + // Update the PHI with the compute value, which is the merge of the inputs. + UpdateState(PN, PNIV); +} + + +void SparseSolver::visitInst(Instruction &I) { + // PHIs are handled by the propagation logic, they are never passed into the + // transfer functions. + if (PHINode *PN = dyn_cast<PHINode>(&I)) + return visitPHINode(*PN); + + // Otherwise, ask the transfer function what the result is. If this is + // something that we care about, remember it. + LatticeVal IV = LatticeFunc->ComputeInstructionState(I, *this); + if (IV != LatticeFunc->getUntrackedVal()) + UpdateState(I, IV); + + if (TerminatorInst *TI = dyn_cast<TerminatorInst>(&I)) + visitTerminatorInst(*TI); +} + +void SparseSolver::Solve(Function &F) { + MarkBlockExecutable(&F.getEntryBlock()); + + // Process the work lists until they are empty! + while (!BBWorkList.empty() || !InstWorkList.empty()) { + // Process the instruction work list. + while (!InstWorkList.empty()) { + Instruction *I = InstWorkList.back(); + InstWorkList.pop_back(); + + DEBUG(dbgs() << "\nPopped off I-WL: " << *I << "\n"); + + // "I" got into the work list because it made a transition. See if any + // users are both live and in need of updating. + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); + UI != E; ++UI) { + Instruction *U = cast<Instruction>(*UI); + if (BBExecutable.count(U->getParent())) // Inst is executable? + visitInst(*U); + } + } + + // Process the basic block work list. + while (!BBWorkList.empty()) { + BasicBlock *BB = BBWorkList.back(); + BBWorkList.pop_back(); + + DEBUG(dbgs() << "\nPopped off BBWL: " << *BB); + + // Notify all instructions in this basic block that they are newly + // executable. + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + visitInst(*I); + } + } +} + +void SparseSolver::Print(Function &F, raw_ostream &OS) const { + OS << "\nFUNCTION: " << F.getNameStr() << "\n"; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (!BBExecutable.count(BB)) + OS << "INFEASIBLE: "; + OS << "\t"; + if (BB->hasName()) + OS << BB->getNameStr() << ":\n"; + else + OS << "; anon bb\n"; + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + LatticeFunc->PrintValue(getLatticeState(I), OS); + OS << *I << "\n"; + } + + OS << "\n"; + } +} + diff --git a/contrib/llvm/lib/Analysis/Trace.cpp b/contrib/llvm/lib/Analysis/Trace.cpp new file mode 100644 index 0000000..68a39cd --- /dev/null +++ b/contrib/llvm/lib/Analysis/Trace.cpp @@ -0,0 +1,51 @@ +//===- Trace.cpp - Implementation of Trace class --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class represents a single trace of LLVM basic blocks. A trace is a +// single entry, multiple exit, region of code that is often hot. Trace-based +// optimizations treat traces almost like they are a large, strange, basic +// block: because the trace path is assumed to be hot, optimizations for the +// fall-through path are made at the expense of the non-fall-through paths. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Trace.h" +#include "llvm/Function.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +Function *Trace::getFunction() const { + return getEntryBasicBlock()->getParent(); +} + +Module *Trace::getModule() const { + return getFunction()->getParent(); +} + +/// print - Write trace to output stream. +/// +void Trace::print(raw_ostream &O) const { + Function *F = getFunction(); + O << "; Trace from function " << F->getNameStr() << ", blocks:\n"; + for (const_iterator i = begin(), e = end(); i != e; ++i) { + O << "; "; + WriteAsOperand(O, *i, true, getModule()); + O << "\n"; + } + O << "; Trace parent function: \n" << *F; +} + +/// dump - Debugger convenience method; writes trace to standard error +/// output stream. +/// +void Trace::dump() const { + print(dbgs()); +} diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp new file mode 100644 index 0000000..0faf1398 --- /dev/null +++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -0,0 +1,300 @@ +//===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the TypeBasedAliasAnalysis pass, which implements +// metadata-based TBAA. +// +// In LLVM IR, memory does not have types, so LLVM's own type system is not +// suitable for doing TBAA. Instead, metadata is added to the IR to describe +// a type system of a higher level language. This can be used to implement +// typical C/C++ TBAA, but it can also be used to implement custom alias +// analysis behavior for other languages. +// +// The current metadata format is very simple. TBAA MDNodes have up to +// three fields, e.g.: +// !0 = metadata !{ metadata !"an example type tree" } +// !1 = metadata !{ metadata !"int", metadata !0 } +// !2 = metadata !{ metadata !"float", metadata !0 } +// !3 = metadata !{ metadata !"const float", metadata !2, i64 1 } +// +// The first field is an identity field. It can be any value, usually +// an MDString, which uniquely identifies the type. The most important +// name in the tree is the name of the root node. Two trees with +// different root node names are entirely disjoint, even if they +// have leaves with common names. +// +// The second field identifies the type's parent node in the tree, or +// is null or omitted for a root node. A type is considered to alias +// all of its descendants and all of its ancestors in the tree. Also, +// a type is considered to alias all types in other trees, so that +// bitcode produced from multiple front-ends is handled conservatively. +// +// If the third field is present, it's an integer which if equal to 1 +// indicates that the type is "constant" (meaning pointsToConstantMemory +// should return true; see +// http://llvm.org/docs/AliasAnalysis.html#OtherItfs). +// +// TODO: The current metadata format doesn't support struct +// fields. For example: +// struct X { +// double d; +// int i; +// }; +// void foo(struct X *x, struct X *y, double *p) { +// *x = *y; +// *p = 0.0; +// } +// Struct X has a double member, so the store to *x can alias the store to *p. +// Currently it's not possible to precisely describe all the things struct X +// aliases, so struct assignments must use conservative TBAA nodes. There's +// no scheme for attaching metadata to @llvm.memcpy yet either. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Constants.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Metadata.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +// A handy option for disabling TBAA functionality. The same effect can also be +// achieved by stripping the !tbaa tags from IR, but this option is sometimes +// more convenient. +static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true)); + +namespace { + /// TBAANode - This is a simple wrapper around an MDNode which provides a + /// higher-level interface by hiding the details of how alias analysis + /// information is encoded in its operands. + class TBAANode { + const MDNode *Node; + + public: + TBAANode() : Node(0) {} + explicit TBAANode(const MDNode *N) : Node(N) {} + + /// getNode - Get the MDNode for this TBAANode. + const MDNode *getNode() const { return Node; } + + /// getParent - Get this TBAANode's Alias tree parent. + TBAANode getParent() const { + if (Node->getNumOperands() < 2) + return TBAANode(); + MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); + if (!P) + return TBAANode(); + // Ok, this node has a valid parent. Return it. + return TBAANode(P); + } + + /// TypeIsImmutable - Test if this TBAANode represents a type for objects + /// which are not modified (by any means) in the context where this + /// AliasAnalysis is relevant. + bool TypeIsImmutable() const { + if (Node->getNumOperands() < 3) + return false; + ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(2)); + if (!CI) + return false; + return CI->getValue()[0]; + } + }; +} + +namespace { + /// TypeBasedAliasAnalysis - This is a simple alias analysis + /// implementation that uses TypeBased to answer queries. + class TypeBasedAliasAnalysis : public ImmutablePass, + public AliasAnalysis { + public: + static char ID; // Class identification, replacement for typeinfo + TypeBasedAliasAnalysis() : ImmutablePass(ID) { + initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + InitializeAliasAnalysis(this); + } + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + virtual void *getAdjustedAnalysisPointer(const void *PI) { + if (PI == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + + bool Aliases(const MDNode *A, const MDNode *B) const; + + private: + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual AliasResult alias(const Location &LocA, const Location &LocB); + virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal); + virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS); + virtual ModRefBehavior getModRefBehavior(const Function *F); + virtual ModRefResult getModRefInfo(ImmutableCallSite CS, + const Location &Loc); + virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2); + }; +} // End of anonymous namespace + +// Register this pass... +char TypeBasedAliasAnalysis::ID = 0; +INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa", + "Type-Based Alias Analysis", false, true, false) + +ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() { + return new TypeBasedAliasAnalysis(); +} + +void +TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AliasAnalysis::getAnalysisUsage(AU); +} + +/// Aliases - Test whether the type represented by A may alias the +/// type represented by B. +bool +TypeBasedAliasAnalysis::Aliases(const MDNode *A, + const MDNode *B) const { + // Keep track of the root node for A and B. + TBAANode RootA, RootB; + + // Climb the tree from A to see if we reach B. + for (TBAANode T(A); ; ) { + if (T.getNode() == B) + // B is an ancestor of A. + return true; + + RootA = T; + T = T.getParent(); + if (!T.getNode()) + break; + } + + // Climb the tree from B to see if we reach A. + for (TBAANode T(B); ; ) { + if (T.getNode() == A) + // A is an ancestor of B. + return true; + + RootB = T; + T = T.getParent(); + if (!T.getNode()) + break; + } + + // Neither node is an ancestor of the other. + + // If they have different roots, they're part of different potentially + // unrelated type systems, so we must be conservative. + if (RootA.getNode() != RootB.getNode()) + return true; + + // If they have the same root, then we've proved there's no alias. + return false; +} + +AliasAnalysis::AliasResult +TypeBasedAliasAnalysis::alias(const Location &LocA, + const Location &LocB) { + if (!EnableTBAA) + return AliasAnalysis::alias(LocA, LocB); + + // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must + // be conservative. + const MDNode *AM = LocA.TBAATag; + if (!AM) return AliasAnalysis::alias(LocA, LocB); + const MDNode *BM = LocB.TBAATag; + if (!BM) return AliasAnalysis::alias(LocA, LocB); + + // If they may alias, chain to the next AliasAnalysis. + if (Aliases(AM, BM)) + return AliasAnalysis::alias(LocA, LocB); + + // Otherwise return a definitive result. + return NoAlias; +} + +bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc, + bool OrLocal) { + if (!EnableTBAA) + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + + const MDNode *M = Loc.TBAATag; + if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); + + // If this is an "immutable" type, we can assume the pointer is pointing + // to constant memory. + if (TBAANode(M).TypeIsImmutable()) + return true; + + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); +} + +AliasAnalysis::ModRefBehavior +TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { + if (!EnableTBAA) + return AliasAnalysis::getModRefBehavior(CS); + + ModRefBehavior Min = UnknownModRefBehavior; + + // If this is an "immutable" type, we can assume the call doesn't write + // to memory. + if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) + if (TBAANode(M).TypeIsImmutable()) + Min = OnlyReadsMemory; + + return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); +} + +AliasAnalysis::ModRefBehavior +TypeBasedAliasAnalysis::getModRefBehavior(const Function *F) { + // Functions don't have metadata. Just chain to the next implementation. + return AliasAnalysis::getModRefBehavior(F); +} + +AliasAnalysis::ModRefResult +TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS, + const Location &Loc) { + if (!EnableTBAA) + return AliasAnalysis::getModRefInfo(CS, Loc); + + if (const MDNode *L = Loc.TBAATag) + if (const MDNode *M = + CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) + if (!Aliases(L, M)) + return NoModRef; + + return AliasAnalysis::getModRefInfo(CS, Loc); +} + +AliasAnalysis::ModRefResult +TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + if (!EnableTBAA) + return AliasAnalysis::getModRefInfo(CS1, CS2); + + if (const MDNode *M1 = + CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) + if (const MDNode *M2 = + CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) + if (!Aliases(M1, M2)) + return NoModRef; + + return AliasAnalysis::getModRefInfo(CS1, CS2); +} diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp new file mode 100644 index 0000000..455c910 --- /dev/null +++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp @@ -0,0 +1,1798 @@ +//===- ValueTracking.cpp - Walk computations to compute properties --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains routines that help analyze properties that chains of +// computations have. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/GlobalVariable.h" +#include "llvm/GlobalAlias.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Operator.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/PatternMatch.h" +#include "llvm/ADT/SmallPtrSet.h" +#include <cstring> +using namespace llvm; +using namespace llvm::PatternMatch; + +const unsigned MaxDepth = 6; + +/// getBitWidth - Returns the bitwidth of the given scalar or pointer type (if +/// unknown returns 0). For vector types, returns the element type's bitwidth. +static unsigned getBitWidth(const Type *Ty, const TargetData *TD) { + if (unsigned BitWidth = Ty->getScalarSizeInBits()) + return BitWidth; + assert(isa<PointerType>(Ty) && "Expected a pointer type!"); + return TD ? TD->getPointerSizeInBits() : 0; +} + +/// ComputeMaskedBits - Determine which of the bits specified in Mask are +/// known to be either zero or one and return them in the KnownZero/KnownOne +/// bit sets. This code only analyzes bits in Mask, in order to short-circuit +/// processing. +/// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that +/// we cannot optimize based on the assumption that it is zero without changing +/// it to be an explicit zero. If we don't change it to zero, other code could +/// optimized based on the contradictory assumption that it is non-zero. +/// Because instcombine aggressively folds operations with undef args anyway, +/// this won't lose us code quality. +/// +/// This function is defined on values with integer type, values with pointer +/// type (but only if TD is non-null), and vectors of integers. In the case +/// where V is a vector, the mask, known zero, and known one values are the +/// same width as the vector element, and the bit is set only if it is true +/// for all of the elements in the vector. +void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, + APInt &KnownZero, APInt &KnownOne, + const TargetData *TD, unsigned Depth) { + assert(V && "No Value?"); + assert(Depth <= MaxDepth && "Limit Search Depth"); + unsigned BitWidth = Mask.getBitWidth(); + assert((V->getType()->isIntOrIntVectorTy() || V->getType()->isPointerTy()) + && "Not integer or pointer type!"); + assert((!TD || + TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && + (!V->getType()->isIntOrIntVectorTy() || + V->getType()->getScalarSizeInBits() == BitWidth) && + KnownZero.getBitWidth() == BitWidth && + KnownOne.getBitWidth() == BitWidth && + "V, Mask, KnownOne and KnownZero should have same BitWidth"); + + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + // We know all of the bits for a constant! + KnownOne = CI->getValue() & Mask; + KnownZero = ~KnownOne & Mask; + return; + } + // Null and aggregate-zero are all-zeros. + if (isa<ConstantPointerNull>(V) || + isa<ConstantAggregateZero>(V)) { + KnownOne.clearAllBits(); + KnownZero = Mask; + return; + } + // Handle a constant vector by taking the intersection of the known bits of + // each element. + if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) { + KnownZero.setAllBits(); KnownOne.setAllBits(); + for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { + APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); + ComputeMaskedBits(CV->getOperand(i), Mask, KnownZero2, KnownOne2, + TD, Depth); + KnownZero &= KnownZero2; + KnownOne &= KnownOne2; + } + return; + } + // The address of an aligned GlobalValue has trailing zeros. + if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { + unsigned Align = GV->getAlignment(); + if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) { + const Type *ObjectType = GV->getType()->getElementType(); + // If the object is defined in the current Module, we'll be giving + // it the preferred alignment. Otherwise, we have to assume that it + // may only have the minimum ABI alignment. + if (!GV->isDeclaration() && !GV->mayBeOverridden()) + Align = TD->getPrefTypeAlignment(ObjectType); + else + Align = TD->getABITypeAlignment(ObjectType); + } + if (Align > 0) + KnownZero = Mask & APInt::getLowBitsSet(BitWidth, + CountTrailingZeros_32(Align)); + else + KnownZero.clearAllBits(); + KnownOne.clearAllBits(); + return; + } + // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has + // the bits of its aliasee. + if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { + if (GA->mayBeOverridden()) { + KnownZero.clearAllBits(); KnownOne.clearAllBits(); + } else { + ComputeMaskedBits(GA->getAliasee(), Mask, KnownZero, KnownOne, + TD, Depth+1); + } + return; + } + + if (Argument *A = dyn_cast<Argument>(V)) { + // Get alignment information off byval arguments if specified in the IR. + if (A->hasByValAttr()) + if (unsigned Align = A->getParamAlignment()) + KnownZero = Mask & APInt::getLowBitsSet(BitWidth, + CountTrailingZeros_32(Align)); + return; + } + + // Start out not knowing anything. + KnownZero.clearAllBits(); KnownOne.clearAllBits(); + + if (Depth == MaxDepth || Mask == 0) + return; // Limit search depth. + + Operator *I = dyn_cast<Operator>(V); + if (!I) return; + + APInt KnownZero2(KnownZero), KnownOne2(KnownOne); + switch (I->getOpcode()) { + default: break; + case Instruction::And: { + // If either the LHS or the RHS are Zero, the result is zero. + ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1); + APInt Mask2(Mask & ~KnownZero); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-1 bits are only known if set in both the LHS & RHS. + KnownOne &= KnownOne2; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + KnownZero |= KnownZero2; + return; + } + case Instruction::Or: { + ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1); + APInt Mask2(Mask & ~KnownOne); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are only known if clear in both the LHS & RHS. + KnownZero &= KnownZero2; + // Output known-1 are known to be set if set in either the LHS | RHS. + KnownOne |= KnownOne2; + return; + } + case Instruction::Xor: { + ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + KnownZero = KnownZeroOut; + return; + } + case Instruction::Mul: { + APInt Mask2 = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero, KnownOne, TD,Depth+1); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If low bits are zero in either operand, output low known-0 bits. + // Also compute a conserative estimate for high known-0 bits. + // More trickiness is possible, but this is sufficient for the + // interesting case of alignment computation. + KnownOne.clearAllBits(); + unsigned TrailZ = KnownZero.countTrailingOnes() + + KnownZero2.countTrailingOnes(); + unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + + KnownZero2.countLeadingOnes(), + BitWidth) - BitWidth; + + TrailZ = std::min(TrailZ, BitWidth); + LeadZ = std::min(LeadZ, BitWidth); + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | + APInt::getHighBitsSet(BitWidth, LeadZ); + KnownZero &= Mask; + return; + } + case Instruction::UDiv: { + // For the purposes of computing leading zeros we can conservatively + // treat a udiv as a logical right shift by the power of 2 known to + // be less than the denominator. + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(I->getOperand(0), + AllOnes, KnownZero2, KnownOne2, TD, Depth+1); + unsigned LeadZ = KnownZero2.countLeadingOnes(); + + KnownOne2.clearAllBits(); + KnownZero2.clearAllBits(); + ComputeMaskedBits(I->getOperand(1), + AllOnes, KnownZero2, KnownOne2, TD, Depth+1); + unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); + if (RHSUnknownLeadingOnes != BitWidth) + LeadZ = std::min(BitWidth, + LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); + + KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask; + return; + } + case Instruction::Select: + ComputeMaskedBits(I->getOperand(2), Mask, KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(1), Mask, KnownZero2, KnownOne2, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + return; + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::SIToFP: + case Instruction::UIToFP: + return; // Can't work with floating point. + case Instruction::PtrToInt: + case Instruction::IntToPtr: + // We can't handle these if we don't know the pointer size. + if (!TD) return; + // FALL THROUGH and handle them the same as zext/trunc. + case Instruction::ZExt: + case Instruction::Trunc: { + const Type *SrcTy = I->getOperand(0)->getType(); + + unsigned SrcBitWidth; + // Note that we handle pointer operands here because of inttoptr/ptrtoint + // which fall through here. + if (SrcTy->isPointerTy()) + SrcBitWidth = TD->getTypeSizeInBits(SrcTy); + else + SrcBitWidth = SrcTy->getScalarSizeInBits(); + + APInt MaskIn = Mask.zextOrTrunc(SrcBitWidth); + KnownZero = KnownZero.zextOrTrunc(SrcBitWidth); + KnownOne = KnownOne.zextOrTrunc(SrcBitWidth); + ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD, + Depth+1); + KnownZero = KnownZero.zextOrTrunc(BitWidth); + KnownOne = KnownOne.zextOrTrunc(BitWidth); + // Any top bits are known to be zero. + if (BitWidth > SrcBitWidth) + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); + return; + } + case Instruction::BitCast: { + const Type *SrcTy = I->getOperand(0)->getType(); + if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && + // TODO: For now, not handling conversions like: + // (bitcast i64 %x to <2 x i32>) + !I->getType()->isVectorTy()) { + ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, TD, + Depth+1); + return; + } + break; + } + case Instruction::SExt: { + // Compute the bits in the result that are not present in the input. + unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); + + APInt MaskIn = Mask.trunc(SrcBitWidth); + KnownZero = KnownZero.trunc(SrcBitWidth); + KnownOne = KnownOne.trunc(SrcBitWidth); + ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + if (KnownZero[SrcBitWidth-1]) // Input sign bit known zero + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); + else if (KnownOne[SrcBitWidth-1]) // Input sign bit known set + KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); + return; + } + case Instruction::Shl: + // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 + if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + APInt Mask2(Mask.lshr(ShiftAmt)); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero <<= ShiftAmt; + KnownOne <<= ShiftAmt; + KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0 + return; + } + break; + case Instruction::LShr: + // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 + if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { + // Compute the new bits that are at the top now. + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); + + // Unsigned shift right. + APInt Mask2(Mask.shl(ShiftAmt)); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero,KnownOne, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); + KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); + // high bits known zero. + KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt); + return; + } + break; + case Instruction::AShr: + // (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 + if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { + // Compute the new bits that are at the top now. + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); + + // Signed shift right. + APInt Mask2(Mask.shl(ShiftAmt)); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); + KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); + + APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); + if (KnownZero[BitWidth-ShiftAmt-1]) // New bits are known zero. + KnownZero |= HighBits; + else if (KnownOne[BitWidth-ShiftAmt-1]) // New bits are known one. + KnownOne |= HighBits; + return; + } + break; + case Instruction::Sub: { + if (ConstantInt *CLHS = dyn_cast<ConstantInt>(I->getOperand(0))) { + // We know that the top bits of C-X are clear if X contains less bits + // than C (i.e. no wrap-around can happen). For example, 20-X is + // positive if we can prove that X is >= 0 and < 16. + if (!CLHS->getValue().isNegative()) { + unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros(); + // NLZ can't be BitWidth with no sign bit + APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); + ComputeMaskedBits(I->getOperand(1), MaskV, KnownZero2, KnownOne2, + TD, Depth+1); + + // If all of the MaskV bits are known to be zero, then we know the + // output top bits are zero, because we now know that the output is + // from [0-C]. + if ((KnownZero2 & MaskV) == MaskV) { + unsigned NLZ2 = CLHS->getValue().countLeadingZeros(); + // Top bits known zero. + KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask; + } + } + } + } + // fall through + case Instruction::Add: { + // If one of the operands has trailing zeros, then the bits that the + // other operand has in those bit positions will be preserved in the + // result. For an add, this works with either operand. For a subtract, + // this only works if the known zeros are in the right operand. + APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + APInt Mask2 = APInt::getLowBitsSet(BitWidth, + BitWidth - Mask.countLeadingZeros()); + ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD, + Depth+1); + assert((LHSKnownZero & LHSKnownOne) == 0 && + "Bits known to be one AND zero?"); + unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes(); + + ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero2, KnownOne2, TD, + Depth+1); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes(); + + // Determine which operand has more trailing zeros, and use that + // many bits from the other operand. + if (LHSKnownZeroOut > RHSKnownZeroOut) { + if (I->getOpcode() == Instruction::Add) { + APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut); + KnownZero |= KnownZero2 & Mask; + KnownOne |= KnownOne2 & Mask; + } else { + // If the known zeros are in the left operand for a subtract, + // fall back to the minimum known zeros in both operands. + KnownZero |= APInt::getLowBitsSet(BitWidth, + std::min(LHSKnownZeroOut, + RHSKnownZeroOut)); + } + } else if (RHSKnownZeroOut >= LHSKnownZeroOut) { + APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut); + KnownZero |= LHSKnownZero & Mask; + KnownOne |= LHSKnownOne & Mask; + } + + // Are we still trying to solve for the sign bit? + if (Mask.isNegative() && !KnownZero.isNegative() && !KnownOne.isNegative()){ + OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(I); + if (OBO->hasNoSignedWrap()) { + if (I->getOpcode() == Instruction::Add) { + // Adding two positive numbers can't wrap into negative + if (LHSKnownZero.isNegative() && KnownZero2.isNegative()) + KnownZero |= APInt::getSignBit(BitWidth); + // and adding two negative numbers can't wrap into positive. + else if (LHSKnownOne.isNegative() && KnownOne2.isNegative()) + KnownOne |= APInt::getSignBit(BitWidth); + } else { + // Subtracting a negative number from a positive one can't wrap + if (LHSKnownZero.isNegative() && KnownOne2.isNegative()) + KnownZero |= APInt::getSignBit(BitWidth); + // neither can subtracting a positive number from a negative one. + else if (LHSKnownOne.isNegative() && KnownZero2.isNegative()) + KnownOne |= APInt::getSignBit(BitWidth); + } + } + } + + return; + } + case Instruction::SRem: + if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { + APInt RA = Rem->getValue().abs(); + if (RA.isPowerOf2()) { + APInt LowBits = RA - 1; + APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, + Depth+1); + + // The low bits of the first operand are unchanged by the srem. + KnownZero = KnownZero2 & LowBits; + KnownOne = KnownOne2 & LowBits; + + // If the first operand is non-negative or has all low bits zero, then + // the upper bits are all zero. + if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits)) + KnownZero |= ~LowBits; + + // If the first operand is negative and not all low bits are zero, then + // the upper bits are all one. + if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0)) + KnownOne |= ~LowBits; + + KnownZero &= Mask; + KnownOne &= Mask; + + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + } + } + + // The sign bit is the LHS's sign bit, except when the result of the + // remainder is zero. + if (Mask.isNegative() && KnownZero.isNonNegative()) { + APInt Mask2 = APInt::getSignBit(BitWidth); + APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD, + Depth+1); + // If it's known zero, our sign bit is also zero. + if (LHSKnownZero.isNegative()) + KnownZero |= LHSKnownZero; + } + + break; + case Instruction::URem: { + if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { + APInt RA = Rem->getValue(); + if (RA.isPowerOf2()) { + APInt LowBits = (RA - 1); + APInt Mask2 = LowBits & Mask; + KnownZero |= ~LowBits & Mask; + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD, + Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + break; + } + } + + // Since the result is less than or equal to either operand, any leading + // zero bits in either operand must also exist in the result. + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(I->getOperand(0), AllOnes, KnownZero, KnownOne, + TD, Depth+1); + ComputeMaskedBits(I->getOperand(1), AllOnes, KnownZero2, KnownOne2, + TD, Depth+1); + + unsigned Leaders = std::max(KnownZero.countLeadingOnes(), + KnownZero2.countLeadingOnes()); + KnownOne.clearAllBits(); + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask; + break; + } + + case Instruction::Alloca: { + AllocaInst *AI = cast<AllocaInst>(V); + unsigned Align = AI->getAlignment(); + if (Align == 0 && TD) + Align = TD->getABITypeAlignment(AI->getType()->getElementType()); + + if (Align > 0) + KnownZero = Mask & APInt::getLowBitsSet(BitWidth, + CountTrailingZeros_32(Align)); + break; + } + case Instruction::GetElementPtr: { + // Analyze all of the subscripts of this getelementptr instruction + // to determine if we can prove known low zero bits. + APInt LocalMask = APInt::getAllOnesValue(BitWidth); + APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0); + ComputeMaskedBits(I->getOperand(0), LocalMask, + LocalKnownZero, LocalKnownOne, TD, Depth+1); + unsigned TrailZ = LocalKnownZero.countTrailingOnes(); + + gep_type_iterator GTI = gep_type_begin(I); + for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { + Value *Index = I->getOperand(i); + if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + // Handle struct member offset arithmetic. + if (!TD) return; + const StructLayout *SL = TD->getStructLayout(STy); + unsigned Idx = cast<ConstantInt>(Index)->getZExtValue(); + uint64_t Offset = SL->getElementOffset(Idx); + TrailZ = std::min(TrailZ, + CountTrailingZeros_64(Offset)); + } else { + // Handle array index arithmetic. + const Type *IndexedTy = GTI.getIndexedType(); + if (!IndexedTy->isSized()) return; + unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits(); + uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1; + LocalMask = APInt::getAllOnesValue(GEPOpiBits); + LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0); + ComputeMaskedBits(Index, LocalMask, + LocalKnownZero, LocalKnownOne, TD, Depth+1); + TrailZ = std::min(TrailZ, + unsigned(CountTrailingZeros_64(TypeSize) + + LocalKnownZero.countTrailingOnes())); + } + } + + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) & Mask; + break; + } + case Instruction::PHI: { + PHINode *P = cast<PHINode>(I); + // Handle the case of a simple two-predecessor recurrence PHI. + // There's a lot more that could theoretically be done here, but + // this is sufficient to catch some interesting cases. + if (P->getNumIncomingValues() == 2) { + for (unsigned i = 0; i != 2; ++i) { + Value *L = P->getIncomingValue(i); + Value *R = P->getIncomingValue(!i); + Operator *LU = dyn_cast<Operator>(L); + if (!LU) + continue; + unsigned Opcode = LU->getOpcode(); + // Check for operations that have the property that if + // both their operands have low zero bits, the result + // will have low zero bits. + if (Opcode == Instruction::Add || + Opcode == Instruction::Sub || + Opcode == Instruction::And || + Opcode == Instruction::Or || + Opcode == Instruction::Mul) { + Value *LL = LU->getOperand(0); + Value *LR = LU->getOperand(1); + // Find a recurrence. + if (LL == I) + L = LR; + else if (LR == I) + L = LL; + else + break; + // Ok, we have a PHI of the form L op= R. Check for low + // zero bits. + APInt Mask2 = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(R, Mask2, KnownZero2, KnownOne2, TD, Depth+1); + Mask2 = APInt::getLowBitsSet(BitWidth, + KnownZero2.countTrailingOnes()); + + // We need to take the minimum number of known bits + APInt KnownZero3(KnownZero), KnownOne3(KnownOne); + ComputeMaskedBits(L, Mask2, KnownZero3, KnownOne3, TD, Depth+1); + + KnownZero = Mask & + APInt::getLowBitsSet(BitWidth, + std::min(KnownZero2.countTrailingOnes(), + KnownZero3.countTrailingOnes())); + break; + } + } + } + + // Unreachable blocks may have zero-operand PHI nodes. + if (P->getNumIncomingValues() == 0) + return; + + // Otherwise take the unions of the known bit sets of the operands, + // taking conservative care to avoid excessive recursion. + if (Depth < MaxDepth - 1 && !KnownZero && !KnownOne) { + // Skip if every incoming value references to ourself. + if (P->hasConstantValue() == P) + break; + + KnownZero = APInt::getAllOnesValue(BitWidth); + KnownOne = APInt::getAllOnesValue(BitWidth); + for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) { + // Skip direct self references. + if (P->getIncomingValue(i) == P) continue; + + KnownZero2 = APInt(BitWidth, 0); + KnownOne2 = APInt(BitWidth, 0); + // Recurse, but cap the recursion to one level, because we don't + // want to waste time spinning around in loops. + ComputeMaskedBits(P->getIncomingValue(i), KnownZero | KnownOne, + KnownZero2, KnownOne2, TD, MaxDepth-1); + KnownZero &= KnownZero2; + KnownOne &= KnownOne2; + // If all bits have been ruled out, there's no need to check + // more operands. + if (!KnownZero && !KnownOne) + break; + } + } + break; + } + case Instruction::Call: + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::ctpop: + case Intrinsic::ctlz: + case Intrinsic::cttz: { + unsigned LowBits = Log2_32(BitWidth)+1; + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + break; + } + case Intrinsic::x86_sse42_crc32_64_8: + case Intrinsic::x86_sse42_crc32_64_64: + KnownZero = APInt::getHighBitsSet(64, 32); + break; + } + } + break; + } +} + +/// ComputeSignBit - Determine whether the sign bit is known to be zero or +/// one. Convenience wrapper around ComputeMaskedBits. +void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, + const TargetData *TD, unsigned Depth) { + unsigned BitWidth = getBitWidth(V->getType(), TD); + if (!BitWidth) { + KnownZero = false; + KnownOne = false; + return; + } + APInt ZeroBits(BitWidth, 0); + APInt OneBits(BitWidth, 0); + ComputeMaskedBits(V, APInt::getSignBit(BitWidth), ZeroBits, OneBits, TD, + Depth); + KnownOne = OneBits[BitWidth - 1]; + KnownZero = ZeroBits[BitWidth - 1]; +} + +/// isPowerOfTwo - Return true if the given value is known to have exactly one +/// bit set when defined. For vectors return true if every element is known to +/// be a power of two when defined. Supports values with integer or pointer +/// types and vectors of integers. +bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, unsigned Depth) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) + return CI->getValue().isPowerOf2(); + // TODO: Handle vector constants. + + // 1 << X is clearly a power of two if the one is not shifted off the end. If + // it is shifted off the end then the result is undefined. + if (match(V, m_Shl(m_One(), m_Value()))) + return true; + + // (signbit) >>l X is clearly a power of two if the one is not shifted off the + // bottom. If it is shifted off the bottom then the result is undefined. + if (match(V, m_LShr(m_SignBit(), m_Value()))) + return true; + + // The remaining tests are all recursive, so bail out if we hit the limit. + if (Depth++ == MaxDepth) + return false; + + if (ZExtInst *ZI = dyn_cast<ZExtInst>(V)) + return isPowerOfTwo(ZI->getOperand(0), TD, Depth); + + if (SelectInst *SI = dyn_cast<SelectInst>(V)) + return isPowerOfTwo(SI->getTrueValue(), TD, Depth) && + isPowerOfTwo(SI->getFalseValue(), TD, Depth); + + // An exact divide or right shift can only shift off zero bits, so the result + // is a power of two only if the first operand is a power of two and not + // copying a sign bit (sdiv int_min, 2). + if (match(V, m_LShr(m_Value(), m_Value())) || + match(V, m_UDiv(m_Value(), m_Value()))) { + PossiblyExactOperator *PEO = cast<PossiblyExactOperator>(V); + if (PEO->isExact()) + return isPowerOfTwo(PEO->getOperand(0), TD, Depth); + } + + return false; +} + +/// isKnownNonZero - Return true if the given value is known to be non-zero +/// when defined. For vectors return true if every element is known to be +/// non-zero when defined. Supports values with integer or pointer type and +/// vectors of integers. +bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { + if (Constant *C = dyn_cast<Constant>(V)) { + if (C->isNullValue()) + return false; + if (isa<ConstantInt>(C)) + // Must be non-zero due to null test above. + return true; + // TODO: Handle vectors + return false; + } + + // The remaining tests are all recursive, so bail out if we hit the limit. + if (Depth++ == MaxDepth) + return false; + + unsigned BitWidth = getBitWidth(V->getType(), TD); + + // X | Y != 0 if X != 0 or Y != 0. + Value *X = 0, *Y = 0; + if (match(V, m_Or(m_Value(X), m_Value(Y)))) + return isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth); + + // ext X != 0 if X != 0. + if (isa<SExtInst>(V) || isa<ZExtInst>(V)) + return isKnownNonZero(cast<Instruction>(V)->getOperand(0), TD, Depth); + + // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined + // if the lowest bit is shifted off the end. + if (BitWidth && match(V, m_Shl(m_Value(X), m_Value(Y)))) { + // shl nuw can't remove any non-zero bits. + BinaryOperator *BO = cast<BinaryOperator>(V); + if (BO->hasNoUnsignedWrap()) + return isKnownNonZero(X, TD, Depth); + + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + ComputeMaskedBits(X, APInt(BitWidth, 1), KnownZero, KnownOne, TD, Depth); + if (KnownOne[0]) + return true; + } + // shr X, Y != 0 if X is negative. Note that the value of the shift is not + // defined if the sign bit is shifted off the end. + else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) { + // shr exact can only shift out zero bits. + BinaryOperator *BO = cast<BinaryOperator>(V); + if (BO->isExact()) + return isKnownNonZero(X, TD, Depth); + + bool XKnownNonNegative, XKnownNegative; + ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth); + if (XKnownNegative) + return true; + } + // div exact can only produce a zero if the dividend is zero. + else if (match(V, m_IDiv(m_Value(X), m_Value()))) { + BinaryOperator *BO = cast<BinaryOperator>(V); + if (BO->isExact()) + return isKnownNonZero(X, TD, Depth); + } + // X + Y. + else if (match(V, m_Add(m_Value(X), m_Value(Y)))) { + bool XKnownNonNegative, XKnownNegative; + bool YKnownNonNegative, YKnownNegative; + ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth); + ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, TD, Depth); + + // If X and Y are both non-negative (as signed values) then their sum is not + // zero unless both X and Y are zero. + if (XKnownNonNegative && YKnownNonNegative) + if (isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth)) + return true; + + // If X and Y are both negative (as signed values) then their sum is not + // zero unless both X and Y equal INT_MIN. + if (BitWidth && XKnownNegative && YKnownNegative) { + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + APInt Mask = APInt::getSignedMaxValue(BitWidth); + // The sign bit of X is set. If some other bit is set then X is not equal + // to INT_MIN. + ComputeMaskedBits(X, Mask, KnownZero, KnownOne, TD, Depth); + if ((KnownOne & Mask) != 0) + return true; + // The sign bit of Y is set. If some other bit is set then Y is not equal + // to INT_MIN. + ComputeMaskedBits(Y, Mask, KnownZero, KnownOne, TD, Depth); + if ((KnownOne & Mask) != 0) + return true; + } + + // The sum of a non-negative number and a power of two is not zero. + if (XKnownNonNegative && isPowerOfTwo(Y, TD, Depth)) + return true; + if (YKnownNonNegative && isPowerOfTwo(X, TD, Depth)) + return true; + } + // (C ? X : Y) != 0 if X != 0 and Y != 0. + else if (SelectInst *SI = dyn_cast<SelectInst>(V)) { + if (isKnownNonZero(SI->getTrueValue(), TD, Depth) && + isKnownNonZero(SI->getFalseValue(), TD, Depth)) + return true; + } + + if (!BitWidth) return false; + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + ComputeMaskedBits(V, APInt::getAllOnesValue(BitWidth), KnownZero, KnownOne, + TD, Depth); + return KnownOne != 0; +} + +/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use +/// this predicate to simplify operations downstream. Mask is known to be zero +/// for bits that V cannot have. +/// +/// This function is defined on values with integer type, values with pointer +/// type (but only if TD is non-null), and vectors of integers. In the case +/// where V is a vector, the mask, known zero, and known one values are the +/// same width as the vector element, and the bit is set only if it is true +/// for all of the elements in the vector. +bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, + const TargetData *TD, unsigned Depth) { + APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); + ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + return (KnownZero & Mask) == Mask; +} + + + +/// ComputeNumSignBits - Return the number of times the sign bit of the +/// register is replicated into the other bits. We know that at least 1 bit +/// is always equal to the sign bit (itself), but other cases can give us +/// information. For example, immediately after an "ashr X, 2", we know that +/// the top 3 bits are all equal to each other, so we return 3. +/// +/// 'Op' must have a scalar integer type. +/// +unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, + unsigned Depth) { + assert((TD || V->getType()->isIntOrIntVectorTy()) && + "ComputeNumSignBits requires a TargetData object to operate " + "on non-integer values!"); + const Type *Ty = V->getType(); + unsigned TyBits = TD ? TD->getTypeSizeInBits(V->getType()->getScalarType()) : + Ty->getScalarSizeInBits(); + unsigned Tmp, Tmp2; + unsigned FirstAnswer = 1; + + // Note that ConstantInt is handled by the general ComputeMaskedBits case + // below. + + if (Depth == 6) + return 1; // Limit search depth. + + Operator *U = dyn_cast<Operator>(V); + switch (Operator::getOpcode(V)) { + default: break; + case Instruction::SExt: + Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); + return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp; + + case Instruction::AShr: + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + // ashr X, C -> adds C sign bits. + if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) { + Tmp += C->getZExtValue(); + if (Tmp > TyBits) Tmp = TyBits; + } + // vector ashr X, <C, C, C, C> -> adds C sign bits + if (ConstantVector *C = dyn_cast<ConstantVector>(U->getOperand(1))) { + if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) { + Tmp += CI->getZExtValue(); + if (Tmp > TyBits) Tmp = TyBits; + } + } + return Tmp; + case Instruction::Shl: + if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) { + // shl destroys sign bits. + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + if (C->getZExtValue() >= TyBits || // Bad shift. + C->getZExtValue() >= Tmp) break; // Shifted all sign bits out. + return Tmp - C->getZExtValue(); + } + break; + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: // NOT is handled here. + // Logical binary ops preserve the number of sign bits at the worst. + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + if (Tmp != 1) { + Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); + FirstAnswer = std::min(Tmp, Tmp2); + // We computed what we know about the sign bits as our first + // answer. Now proceed to the generic code that uses + // ComputeMaskedBits, and pick whichever answer is better. + } + break; + + case Instruction::Select: + Tmp = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); + if (Tmp == 1) return 1; // Early out. + Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1); + return std::min(Tmp, Tmp2); + + case Instruction::Add: + // Add can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + if (Tmp == 1) return 1; // Early out. + + // Special case decrementing a value (ADD X, -1): + if (ConstantInt *CRHS = dyn_cast<ConstantInt>(U->getOperand(1))) + if (CRHS->isAllOnesValue()) { + APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); + APInt Mask = APInt::getAllOnesValue(TyBits); + ComputeMaskedBits(U->getOperand(0), Mask, KnownZero, KnownOne, TD, + Depth+1); + + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((KnownZero | APInt(TyBits, 1)) == Mask) + return TyBits; + + // If we are subtracting one from a positive number, there is no carry + // out of the result. + if (KnownZero.isNegative()) + return Tmp; + } + + Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); + if (Tmp2 == 1) return 1; + return std::min(Tmp, Tmp2)-1; + + case Instruction::Sub: + Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); + if (Tmp2 == 1) return 1; + + // Handle NEG. + if (ConstantInt *CLHS = dyn_cast<ConstantInt>(U->getOperand(0))) + if (CLHS->isNullValue()) { + APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); + APInt Mask = APInt::getAllOnesValue(TyBits); + ComputeMaskedBits(U->getOperand(1), Mask, KnownZero, KnownOne, + TD, Depth+1); + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((KnownZero | APInt(TyBits, 1)) == Mask) + return TyBits; + + // If the input is known to be positive (the sign bit is known clear), + // the output of the NEG has the same number of sign bits as the input. + if (KnownZero.isNegative()) + return Tmp2; + + // Otherwise, we treat this like a SUB. + } + + // Sub can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + if (Tmp == 1) return 1; // Early out. + return std::min(Tmp, Tmp2)-1; + + case Instruction::PHI: { + PHINode *PN = cast<PHINode>(U); + // Don't analyze large in-degree PHIs. + if (PN->getNumIncomingValues() > 4) break; + + // Take the minimum of all incoming values. This can't infinitely loop + // because of our depth threshold. + Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1); + for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { + if (Tmp == 1) return Tmp; + Tmp = std::min(Tmp, + ComputeNumSignBits(PN->getIncomingValue(i), TD, Depth+1)); + } + return Tmp; + } + + case Instruction::Trunc: + // FIXME: it's tricky to do anything useful for this, but it is an important + // case for targets like X86. + break; + } + + // Finally, if we can prove that the top bits of the result are 0's or 1's, + // use this information. + APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); + APInt Mask = APInt::getAllOnesValue(TyBits); + ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth); + + if (KnownZero.isNegative()) { // sign bit is 0 + Mask = KnownZero; + } else if (KnownOne.isNegative()) { // sign bit is 1; + Mask = KnownOne; + } else { + // Nothing known. + return FirstAnswer; + } + + // Okay, we know that the sign bit in Mask is set. Use CLZ to determine + // the number of identical bits in the top of the input value. + Mask = ~Mask; + Mask <<= Mask.getBitWidth()-TyBits; + // Return # leading zeros. We use 'min' here in case Val was zero before + // shifting. We don't want to return '64' as for an i32 "0". + return std::max(FirstAnswer, std::min(TyBits, Mask.countLeadingZeros())); +} + +/// ComputeMultiple - This function computes the integer multiple of Base that +/// equals V. If successful, it returns true and returns the multiple in +/// Multiple. If unsuccessful, it returns false. It looks +/// through SExt instructions only if LookThroughSExt is true. +bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, + bool LookThroughSExt, unsigned Depth) { + const unsigned MaxDepth = 6; + + assert(V && "No Value?"); + assert(Depth <= MaxDepth && "Limit Search Depth"); + assert(V->getType()->isIntegerTy() && "Not integer or pointer type!"); + + const Type *T = V->getType(); + + ConstantInt *CI = dyn_cast<ConstantInt>(V); + + if (Base == 0) + return false; + + if (Base == 1) { + Multiple = V; + return true; + } + + ConstantExpr *CO = dyn_cast<ConstantExpr>(V); + Constant *BaseVal = ConstantInt::get(T, Base); + if (CO && CO == BaseVal) { + // Multiple is 1. + Multiple = ConstantInt::get(T, 1); + return true; + } + + if (CI && CI->getZExtValue() % Base == 0) { + Multiple = ConstantInt::get(T, CI->getZExtValue() / Base); + return true; + } + + if (Depth == MaxDepth) return false; // Limit search depth. + + Operator *I = dyn_cast<Operator>(V); + if (!I) return false; + + switch (I->getOpcode()) { + default: break; + case Instruction::SExt: + if (!LookThroughSExt) return false; + // otherwise fall through to ZExt + case Instruction::ZExt: + return ComputeMultiple(I->getOperand(0), Base, Multiple, + LookThroughSExt, Depth+1); + case Instruction::Shl: + case Instruction::Mul: { + Value *Op0 = I->getOperand(0); + Value *Op1 = I->getOperand(1); + + if (I->getOpcode() == Instruction::Shl) { + ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1); + if (!Op1CI) return false; + // Turn Op0 << Op1 into Op0 * 2^Op1 + APInt Op1Int = Op1CI->getValue(); + uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1); + APInt API(Op1Int.getBitWidth(), 0); + API.setBit(BitToSet); + Op1 = ConstantInt::get(V->getContext(), API); + } + + Value *Mul0 = NULL; + if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) { + if (Constant *Op1C = dyn_cast<Constant>(Op1)) + if (Constant *MulC = dyn_cast<Constant>(Mul0)) { + if (Op1C->getType()->getPrimitiveSizeInBits() < + MulC->getType()->getPrimitiveSizeInBits()) + Op1C = ConstantExpr::getZExt(Op1C, MulC->getType()); + if (Op1C->getType()->getPrimitiveSizeInBits() > + MulC->getType()->getPrimitiveSizeInBits()) + MulC = ConstantExpr::getZExt(MulC, Op1C->getType()); + + // V == Base * (Mul0 * Op1), so return (Mul0 * Op1) + Multiple = ConstantExpr::getMul(MulC, Op1C); + return true; + } + + if (ConstantInt *Mul0CI = dyn_cast<ConstantInt>(Mul0)) + if (Mul0CI->getValue() == 1) { + // V == Base * Op1, so return Op1 + Multiple = Op1; + return true; + } + } + + Value *Mul1 = NULL; + if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) { + if (Constant *Op0C = dyn_cast<Constant>(Op0)) + if (Constant *MulC = dyn_cast<Constant>(Mul1)) { + if (Op0C->getType()->getPrimitiveSizeInBits() < + MulC->getType()->getPrimitiveSizeInBits()) + Op0C = ConstantExpr::getZExt(Op0C, MulC->getType()); + if (Op0C->getType()->getPrimitiveSizeInBits() > + MulC->getType()->getPrimitiveSizeInBits()) + MulC = ConstantExpr::getZExt(MulC, Op0C->getType()); + + // V == Base * (Mul1 * Op0), so return (Mul1 * Op0) + Multiple = ConstantExpr::getMul(MulC, Op0C); + return true; + } + + if (ConstantInt *Mul1CI = dyn_cast<ConstantInt>(Mul1)) + if (Mul1CI->getValue() == 1) { + // V == Base * Op0, so return Op0 + Multiple = Op0; + return true; + } + } + } + } + + // We could not determine if V is a multiple of Base. + return false; +} + +/// CannotBeNegativeZero - Return true if we can prove that the specified FP +/// value is never equal to -0.0. +/// +/// NOTE: this function will need to be revisited when we support non-default +/// rounding modes! +/// +bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) + return !CFP->getValueAPF().isNegZero(); + + if (Depth == 6) + return 1; // Limit search depth. + + const Operator *I = dyn_cast<Operator>(V); + if (I == 0) return false; + + // (add x, 0.0) is guaranteed to return +0.0, not -0.0. + if (I->getOpcode() == Instruction::FAdd && + isa<ConstantFP>(I->getOperand(1)) && + cast<ConstantFP>(I->getOperand(1))->isNullValue()) + return true; + + // sitofp and uitofp turn into +0.0 for zero. + if (isa<SIToFPInst>(I) || isa<UIToFPInst>(I)) + return true; + + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + // sqrt(-0.0) = -0.0, no other negative results are possible. + if (II->getIntrinsicID() == Intrinsic::sqrt) + return CannotBeNegativeZero(II->getArgOperand(0), Depth+1); + + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (const Function *F = CI->getCalledFunction()) { + if (F->isDeclaration()) { + // abs(x) != -0.0 + if (F->getName() == "abs") return true; + // fabs[lf](x) != -0.0 + if (F->getName() == "fabs") return true; + if (F->getName() == "fabsf") return true; + if (F->getName() == "fabsl") return true; + if (F->getName() == "sqrt" || F->getName() == "sqrtf" || + F->getName() == "sqrtl") + return CannotBeNegativeZero(CI->getArgOperand(0), Depth+1); + } + } + + return false; +} + +/// isBytewiseValue - If the specified value can be set by repeating the same +/// byte in memory, return the i8 value that it is represented with. This is +/// true for all i8 values obviously, but is also true for i32 0, i32 -1, +/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated +/// byte store (e.g. i16 0x1234), return null. +Value *llvm::isBytewiseValue(Value *V) { + // All byte-wide stores are splatable, even of arbitrary variables. + if (V->getType()->isIntegerTy(8)) return V; + + // Handle 'null' ConstantArrayZero etc. + if (Constant *C = dyn_cast<Constant>(V)) + if (C->isNullValue()) + return Constant::getNullValue(Type::getInt8Ty(V->getContext())); + + // Constant float and double values can be handled as integer values if the + // corresponding integer value is "byteable". An important case is 0.0. + if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { + if (CFP->getType()->isFloatTy()) + V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext())); + if (CFP->getType()->isDoubleTy()) + V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext())); + // Don't handle long double formats, which have strange constraints. + } + + // We can handle constant integers that are power of two in size and a + // multiple of 8 bits. + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + unsigned Width = CI->getBitWidth(); + if (isPowerOf2_32(Width) && Width > 8) { + // We can handle this value if the recursive binary decomposition is the + // same at all levels. + APInt Val = CI->getValue(); + APInt Val2; + while (Val.getBitWidth() != 8) { + unsigned NextWidth = Val.getBitWidth()/2; + Val2 = Val.lshr(NextWidth); + Val2 = Val2.trunc(Val.getBitWidth()/2); + Val = Val.trunc(Val.getBitWidth()/2); + + // If the top/bottom halves aren't the same, reject it. + if (Val != Val2) + return 0; + } + return ConstantInt::get(V->getContext(), Val); + } + } + + // A ConstantArray is splatable if all its members are equal and also + // splatable. + if (ConstantArray *CA = dyn_cast<ConstantArray>(V)) { + if (CA->getNumOperands() == 0) + return 0; + + Value *Val = isBytewiseValue(CA->getOperand(0)); + if (!Val) + return 0; + + for (unsigned I = 1, E = CA->getNumOperands(); I != E; ++I) + if (CA->getOperand(I-1) != CA->getOperand(I)) + return 0; + + return Val; + } + + // Conceptually, we could handle things like: + // %a = zext i8 %X to i16 + // %b = shl i16 %a, 8 + // %c = or i16 %a, %b + // but until there is an example that actually needs this, it doesn't seem + // worth worrying about. + return 0; +} + + +// This is the recursive version of BuildSubAggregate. It takes a few different +// arguments. Idxs is the index within the nested struct From that we are +// looking at now (which is of type IndexedType). IdxSkip is the number of +// indices from Idxs that should be left out when inserting into the resulting +// struct. To is the result struct built so far, new insertvalue instructions +// build on that. +static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, + SmallVector<unsigned, 10> &Idxs, + unsigned IdxSkip, + Instruction *InsertBefore) { + const llvm::StructType *STy = llvm::dyn_cast<llvm::StructType>(IndexedType); + if (STy) { + // Save the original To argument so we can modify it + Value *OrigTo = To; + // General case, the type indexed by Idxs is a struct + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + // Process each struct element recursively + Idxs.push_back(i); + Value *PrevTo = To; + To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip, + InsertBefore); + Idxs.pop_back(); + if (!To) { + // Couldn't find any inserted value for this index? Cleanup + while (PrevTo != OrigTo) { + InsertValueInst* Del = cast<InsertValueInst>(PrevTo); + PrevTo = Del->getAggregateOperand(); + Del->eraseFromParent(); + } + // Stop processing elements + break; + } + } + // If we successfully found a value for each of our subaggregates + if (To) + return To; + } + // Base case, the type indexed by SourceIdxs is not a struct, or not all of + // the struct's elements had a value that was inserted directly. In the latter + // case, perhaps we can't determine each of the subelements individually, but + // we might be able to find the complete struct somewhere. + + // Find the value that is at that particular spot + Value *V = FindInsertedValue(From, Idxs); + + if (!V) + return NULL; + + // Insert the value in the new (sub) aggregrate + return llvm::InsertValueInst::Create(To, V, + ArrayRef<unsigned>(Idxs).slice(IdxSkip), + "tmp", InsertBefore); +} + +// This helper takes a nested struct and extracts a part of it (which is again a +// struct) into a new value. For example, given the struct: +// { a, { b, { c, d }, e } } +// and the indices "1, 1" this returns +// { c, d }. +// +// It does this by inserting an insertvalue for each element in the resulting +// struct, as opposed to just inserting a single struct. This will only work if +// each of the elements of the substruct are known (ie, inserted into From by an +// insertvalue instruction somewhere). +// +// All inserted insertvalue instructions are inserted before InsertBefore +static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range, + Instruction *InsertBefore) { + assert(InsertBefore && "Must have someplace to insert!"); + const Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(), + idx_range); + Value *To = UndefValue::get(IndexedType); + SmallVector<unsigned, 10> Idxs(idx_range.begin(), idx_range.end()); + unsigned IdxSkip = Idxs.size(); + + return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore); +} + +/// FindInsertedValue - Given an aggregrate and an sequence of indices, see if +/// the scalar value indexed is already around as a register, for example if it +/// were inserted directly into the aggregrate. +/// +/// If InsertBefore is not null, this function will duplicate (modified) +/// insertvalues when a part of a nested struct is extracted. +Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, + Instruction *InsertBefore) { + // Nothing to index? Just return V then (this is useful at the end of our + // recursion) + if (idx_range.empty()) + return V; + // We have indices, so V should have an indexable type + assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) + && "Not looking at a struct or array?"); + assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) + && "Invalid indices for type?"); + const CompositeType *PTy = cast<CompositeType>(V->getType()); + + if (isa<UndefValue>(V)) + return UndefValue::get(ExtractValueInst::getIndexedType(PTy, + idx_range)); + else if (isa<ConstantAggregateZero>(V)) + return Constant::getNullValue(ExtractValueInst::getIndexedType(PTy, + idx_range)); + else if (Constant *C = dyn_cast<Constant>(V)) { + if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) + // Recursively process this constant + return FindInsertedValue(C->getOperand(idx_range[0]), idx_range.slice(1), + InsertBefore); + } else if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) { + // Loop the indices for the insertvalue instruction in parallel with the + // requested indices + const unsigned *req_idx = idx_range.begin(); + for (const unsigned *i = I->idx_begin(), *e = I->idx_end(); + i != e; ++i, ++req_idx) { + if (req_idx == idx_range.end()) { + if (InsertBefore) + // The requested index identifies a part of a nested aggregate. Handle + // this specially. For example, + // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0 + // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1 + // %C = extractvalue {i32, { i32, i32 } } %B, 1 + // This can be changed into + // %A = insertvalue {i32, i32 } undef, i32 10, 0 + // %C = insertvalue {i32, i32 } %A, i32 11, 1 + // which allows the unused 0,0 element from the nested struct to be + // removed. + return BuildSubAggregate(V, + ArrayRef<unsigned>(idx_range.begin(), + req_idx), + InsertBefore); + else + // We can't handle this without inserting insertvalues + return 0; + } + + // This insert value inserts something else than what we are looking for. + // See if the (aggregrate) value inserted into has the value we are + // looking for, then. + if (*req_idx != *i) + return FindInsertedValue(I->getAggregateOperand(), idx_range, + InsertBefore); + } + // If we end up here, the indices of the insertvalue match with those + // requested (though possibly only partially). Now we recursively look at + // the inserted value, passing any remaining indices. + return FindInsertedValue(I->getInsertedValueOperand(), + ArrayRef<unsigned>(req_idx, idx_range.end()), + InsertBefore); + } else if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) { + // If we're extracting a value from an aggregrate that was extracted from + // something else, we can extract from that something else directly instead. + // However, we will need to chain I's indices with the requested indices. + + // Calculate the number of indices required + unsigned size = I->getNumIndices() + idx_range.size(); + // Allocate some space to put the new indices in + SmallVector<unsigned, 5> Idxs; + Idxs.reserve(size); + // Add indices from the extract value instruction + Idxs.append(I->idx_begin(), I->idx_end()); + + // Add requested indices + Idxs.append(idx_range.begin(), idx_range.end()); + + assert(Idxs.size() == size + && "Number of indices added not correct?"); + + return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore); + } + // Otherwise, we don't know (such as, extracting from a function return value + // or load instruction) + return 0; +} + +/// GetPointerBaseWithConstantOffset - Analyze the specified pointer to see if +/// it can be expressed as a base pointer plus a constant offset. Return the +/// base and offset to the caller. +Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, + const TargetData &TD) { + Operator *PtrOp = dyn_cast<Operator>(Ptr); + if (PtrOp == 0) return Ptr; + + // Just look through bitcasts. + if (PtrOp->getOpcode() == Instruction::BitCast) + return GetPointerBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD); + + // If this is a GEP with constant indices, we can look through it. + GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp); + if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr; + + gep_type_iterator GTI = gep_type_begin(GEP); + for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E; + ++I, ++GTI) { + ConstantInt *OpC = cast<ConstantInt>(*I); + if (OpC->isZero()) continue; + + // Handle a struct and array indices which add their offset to the pointer. + if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + } else { + uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); + Offset += OpC->getSExtValue()*Size; + } + } + + // Re-sign extend from the pointer size if needed to get overflow edge cases + // right. + unsigned PtrSize = TD.getPointerSizeInBits(); + if (PtrSize < 64) + Offset = (Offset << (64-PtrSize)) >> (64-PtrSize); + + return GetPointerBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD); +} + + +/// GetConstantStringInfo - This function computes the length of a +/// null-terminated C string pointed to by V. If successful, it returns true +/// and returns the string in Str. If unsuccessful, it returns false. +bool llvm::GetConstantStringInfo(const Value *V, std::string &Str, + uint64_t Offset, + bool StopAtNul) { + // If V is NULL then return false; + if (V == NULL) return false; + + // Look through bitcast instructions. + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V)) + return GetConstantStringInfo(BCI->getOperand(0), Str, Offset, StopAtNul); + + // If the value is not a GEP instruction nor a constant expression with a + // GEP instruction, then return false because ConstantArray can't occur + // any other way + const User *GEP = 0; + if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) { + GEP = GEPI; + } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + if (CE->getOpcode() == Instruction::BitCast) + return GetConstantStringInfo(CE->getOperand(0), Str, Offset, StopAtNul); + if (CE->getOpcode() != Instruction::GetElementPtr) + return false; + GEP = CE; + } + + if (GEP) { + // Make sure the GEP has exactly three arguments. + if (GEP->getNumOperands() != 3) + return false; + + // Make sure the index-ee is a pointer to array of i8. + const PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType()); + const ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType()); + if (AT == 0 || !AT->getElementType()->isIntegerTy(8)) + return false; + + // Check to make sure that the first operand of the GEP is an integer and + // has value 0 so that we are sure we're indexing into the initializer. + const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1)); + if (FirstIdx == 0 || !FirstIdx->isZero()) + return false; + + // If the second index isn't a ConstantInt, then this is a variable index + // into the array. If this occurs, we can't say anything meaningful about + // the string. + uint64_t StartIdx = 0; + if (const ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2))) + StartIdx = CI->getZExtValue(); + else + return false; + return GetConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset, + StopAtNul); + } + + // The GEP instruction, constant or instruction, must reference a global + // variable that is a constant and is initialized. The referenced constant + // initializer is the array that we'll use for optimization. + const GlobalVariable* GV = dyn_cast<GlobalVariable>(V); + if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) + return false; + const Constant *GlobalInit = GV->getInitializer(); + + // Handle the ConstantAggregateZero case + if (isa<ConstantAggregateZero>(GlobalInit)) { + // This is a degenerate case. The initializer is constant zero so the + // length of the string must be zero. + Str.clear(); + return true; + } + + // Must be a Constant Array + const ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit); + if (Array == 0 || !Array->getType()->getElementType()->isIntegerTy(8)) + return false; + + // Get the number of elements in the array + uint64_t NumElts = Array->getType()->getNumElements(); + + if (Offset > NumElts) + return false; + + // Traverse the constant array from 'Offset' which is the place the GEP refers + // to in the array. + Str.reserve(NumElts-Offset); + for (unsigned i = Offset; i != NumElts; ++i) { + const Constant *Elt = Array->getOperand(i); + const ConstantInt *CI = dyn_cast<ConstantInt>(Elt); + if (!CI) // This array isn't suitable, non-int initializer. + return false; + if (StopAtNul && CI->isZero()) + return true; // we found end of string, success! + Str += (char)CI->getZExtValue(); + } + + // The array isn't null terminated, but maybe this is a memcpy, not a strcpy. + return true; +} + +// These next two are very similar to the above, but also look through PHI +// nodes. +// TODO: See if we can integrate these two together. + +/// GetStringLengthH - If we can compute the length of the string pointed to by +/// the specified pointer, return 'len+1'. If we can't, return 0. +static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) { + // Look through noop bitcast instructions. + if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) + return GetStringLengthH(BCI->getOperand(0), PHIs); + + // If this is a PHI node, there are two cases: either we have already seen it + // or we haven't. + if (PHINode *PN = dyn_cast<PHINode>(V)) { + if (!PHIs.insert(PN)) + return ~0ULL; // already in the set. + + // If it was new, see if all the input strings are the same length. + uint64_t LenSoFar = ~0ULL; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs); + if (Len == 0) return 0; // Unknown length -> unknown. + + if (Len == ~0ULL) continue; + + if (Len != LenSoFar && LenSoFar != ~0ULL) + return 0; // Disagree -> unknown. + LenSoFar = Len; + } + + // Success, all agree. + return LenSoFar; + } + + // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y) + if (SelectInst *SI = dyn_cast<SelectInst>(V)) { + uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs); + if (Len1 == 0) return 0; + uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs); + if (Len2 == 0) return 0; + if (Len1 == ~0ULL) return Len2; + if (Len2 == ~0ULL) return Len1; + if (Len1 != Len2) return 0; + return Len1; + } + + // If the value is not a GEP instruction nor a constant expression with a + // GEP instruction, then return unknown. + User *GEP = 0; + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) { + GEP = GEPI; + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + if (CE->getOpcode() != Instruction::GetElementPtr) + return 0; + GEP = CE; + } else { + return 0; + } + + // Make sure the GEP has exactly three arguments. + if (GEP->getNumOperands() != 3) + return 0; + + // Check to make sure that the first operand of the GEP is an integer and + // has value 0 so that we are sure we're indexing into the initializer. + if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) { + if (!Idx->isZero()) + return 0; + } else + return 0; + + // If the second index isn't a ConstantInt, then this is a variable index + // into the array. If this occurs, we can't say anything meaningful about + // the string. + uint64_t StartIdx = 0; + if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2))) + StartIdx = CI->getZExtValue(); + else + return 0; + + // The GEP instruction, constant or instruction, must reference a global + // variable that is a constant and is initialized. The referenced constant + // initializer is the array that we'll use for optimization. + GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)); + if (!GV || !GV->isConstant() || !GV->hasInitializer() || + GV->mayBeOverridden()) + return 0; + Constant *GlobalInit = GV->getInitializer(); + + // Handle the ConstantAggregateZero case, which is a degenerate case. The + // initializer is constant zero so the length of the string must be zero. + if (isa<ConstantAggregateZero>(GlobalInit)) + return 1; // Len = 0 offset by 1. + + // Must be a Constant Array + ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit); + if (!Array || !Array->getType()->getElementType()->isIntegerTy(8)) + return false; + + // Get the number of elements in the array + uint64_t NumElts = Array->getType()->getNumElements(); + + // Traverse the constant array from StartIdx (derived above) which is + // the place the GEP refers to in the array. + for (unsigned i = StartIdx; i != NumElts; ++i) { + Constant *Elt = Array->getOperand(i); + ConstantInt *CI = dyn_cast<ConstantInt>(Elt); + if (!CI) // This array isn't suitable, non-int initializer. + return 0; + if (CI->isZero()) + return i-StartIdx+1; // We found end of string, success! + } + + return 0; // The array isn't null terminated, conservatively return 'unknown'. +} + +/// GetStringLength - If we can compute the length of the string pointed to by +/// the specified pointer, return 'len+1'. If we can't, return 0. +uint64_t llvm::GetStringLength(Value *V) { + if (!V->getType()->isPointerTy()) return 0; + + SmallPtrSet<PHINode*, 32> PHIs; + uint64_t Len = GetStringLengthH(V, PHIs); + // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return + // an empty string as a length. + return Len == ~0ULL ? 1 : Len; +} + +Value * +llvm::GetUnderlyingObject(Value *V, const TargetData *TD, unsigned MaxLookup) { + if (!V->getType()->isPointerTy()) + return V; + for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { + V = GEP->getPointerOperand(); + } else if (Operator::getOpcode(V) == Instruction::BitCast) { + V = cast<Operator>(V)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { + if (GA->mayBeOverridden()) + return V; + V = GA->getAliasee(); + } else { + // See if InstructionSimplify knows any relevant tricks. + if (Instruction *I = dyn_cast<Instruction>(V)) + // TODO: Acquire a DominatorTree and use it. + if (Value *Simplified = SimplifyInstruction(I, TD, 0)) { + V = Simplified; + continue; + } + + return V; + } + assert(V->getType()->isPointerTy() && "Unexpected operand type!"); + } + return V; +} + +/// onlyUsedByLifetimeMarkers - Return true if the only users of this pointer +/// are lifetime markers. +/// +bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { + for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); + UI != UE; ++UI) { + const IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI); + if (!II) return false; + + if (II->getIntrinsicID() != Intrinsic::lifetime_start && + II->getIntrinsicID() != Intrinsic::lifetime_end) + return false; + } + return true; +} |