diff options
author | dim <dim@FreeBSD.org> | 2011-10-20 21:10:27 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2011-10-20 21:10:27 +0000 |
commit | 7b3392326c40c3c20697816acae597ba7b3144eb (patch) | |
tree | 2cbcf22585e99f8a87d12d5ff94f392c0d266819 /lib/Analysis | |
parent | 1176aa52646fe641a4243a246aa7f960c708a274 (diff) | |
download | FreeBSD-src-7b3392326c40c3c20697816acae597ba7b3144eb.zip FreeBSD-src-7b3392326c40c3c20697816acae597ba7b3144eb.tar.gz |
Vendor import of llvm release_30 branch r142614:
http://llvm.org/svn/llvm-project/llvm/branches/release_30@142614
Diffstat (limited to 'lib/Analysis')
35 files changed, 2539 insertions, 1150 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index c189a00..bd132c0 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -237,6 +237,19 @@ AliasAnalysis::Location AliasAnalysis::getLocation(const VAArgInst *VI) { VI->getMetadata(LLVMContext::MD_tbaa)); } +AliasAnalysis::Location +AliasAnalysis::getLocation(const AtomicCmpXchgInst *CXI) { + return Location(CXI->getPointerOperand(), + getTypeStoreSize(CXI->getCompareOperand()->getType()), + CXI->getMetadata(LLVMContext::MD_tbaa)); +} + +AliasAnalysis::Location +AliasAnalysis::getLocation(const AtomicRMWInst *RMWI) { + return Location(RMWI->getPointerOperand(), + getTypeStoreSize(RMWI->getValOperand()->getType()), + RMWI->getMetadata(LLVMContext::MD_tbaa)); +} AliasAnalysis::Location AliasAnalysis::getLocationForSource(const MemTransferInst *MTI) { @@ -268,8 +281,8 @@ AliasAnalysis::getLocationForDest(const MemIntrinsic *MTI) { AliasAnalysis::ModRefResult AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) { - // Be conservative in the face of volatile. - if (L->isVolatile()) + // Be conservative in the face of volatile/atomic. + if (!L->isUnordered()) return ModRef; // If the load address doesn't alias the given address, it doesn't read @@ -283,8 +296,8 @@ AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) { AliasAnalysis::ModRefResult AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) { - // Be conservative in the face of volatile. - if (S->isVolatile()) + // Be conservative in the face of volatile/atomic. + if (!S->isUnordered()) return ModRef; // If the store address cannot alias the pointer in question, then the @@ -317,6 +330,33 @@ AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) { return ModRef; } +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(const AtomicCmpXchgInst *CX, const Location &Loc) { + // Acquire/Release cmpxchg has properties that matter for arbitrary addresses. + if (CX->getOrdering() > Monotonic) + return ModRef; + + // If the cmpxchg address does not alias the location, it does not access it. + if (!alias(getLocation(CX), Loc)) + return NoModRef; + + return ModRef; +} + +AliasAnalysis::ModRefResult +AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) { + // Acquire/Release atomicrmw has properties that matter for arbitrary addresses. + if (RMW->getOrdering() > Monotonic) + return ModRef; + + // If the atomicrmw address does not alias the location, it does not access it. + if (!alias(getLocation(RMW), Loc)) + return NoModRef; + + return ModRef; +} + + // AliasAnalysis destructor: DO NOT move this to the header file for // AliasAnalysis or else clients of the AliasAnalysis class may not depend on // the AliasAnalysis.o file in the current .a file, causing alias analysis @@ -341,7 +381,7 @@ void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { /// getTypeStoreSize - Return the TargetData store size for the given type, /// if known, or a conservative value otherwise. /// -uint64_t AliasAnalysis::getTypeStoreSize(const Type *Ty) { +uint64_t AliasAnalysis::getTypeStoreSize(Type *Ty) { return TD ? TD->getTypeStoreSize(Ty) : UnknownSize; } diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp index 1afc1b7..37271b9 100644 --- a/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -171,12 +171,12 @@ bool AAEval::runOnFunction(Function &F) { for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end(); I1 != E; ++I1) { uint64_t I1Size = AliasAnalysis::UnknownSize; - const Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType(); + Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType(); if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy); for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) { uint64_t I2Size = AliasAnalysis::UnknownSize; - const Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType(); + Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType(); if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy); switch (AA.alias(*I1, I1Size, *I2, I2Size)) { @@ -207,7 +207,7 @@ bool AAEval::runOnFunction(Function &F) { for (SetVector<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end(); V != Ve; ++V) { uint64_t Size = AliasAnalysis::UnknownSize; - const Type *ElTy = cast<PointerType>((*V)->getType())->getElementType(); + Type *ElTy = cast<PointerType>((*V)->getType())->getElementType(); if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy); switch (AA.getModRefInfo(*C, *V, Size)) { diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index 2ed6949..3fcd3b5 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -56,12 +56,12 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) { AliasTy = MayAlias; } - if (CallSites.empty()) { // Merge call sites... - if (!AS.CallSites.empty()) - std::swap(CallSites, AS.CallSites); - } else if (!AS.CallSites.empty()) { - CallSites.insert(CallSites.end(), AS.CallSites.begin(), AS.CallSites.end()); - AS.CallSites.clear(); + if (UnknownInsts.empty()) { // Merge call sites... + if (!AS.UnknownInsts.empty()) + std::swap(UnknownInsts, AS.UnknownInsts); + } else if (!AS.UnknownInsts.empty()) { + UnknownInsts.insert(UnknownInsts.end(), AS.UnknownInsts.begin(), AS.UnknownInsts.end()); + AS.UnknownInsts.clear(); } AS.Forward = this; // Forward across AS now... @@ -123,13 +123,10 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry, addRef(); // Entry points to alias set. } -void AliasSet::addCallSite(CallSite CS, AliasAnalysis &AA) { - CallSites.push_back(CS.getInstruction()); +void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) { + UnknownInsts.push_back(I); - AliasAnalysis::ModRefBehavior Behavior = AA.getModRefBehavior(CS); - if (Behavior == AliasAnalysis::DoesNotAccessMemory) - return; - if (AliasAnalysis::onlyReadsMemory(Behavior)) { + if (!I->mayWriteToMemory()) { AliasTy = MayAlias; AccessTy |= Refs; return; @@ -147,7 +144,7 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size, const MDNode *TBAAInfo, AliasAnalysis &AA) const { if (AliasTy == MustAlias) { - assert(CallSites.empty() && "Illegal must alias set!"); + assert(UnknownInsts.empty() && "Illegal must alias set!"); // If this is a set of MustAliases, only check to see if the pointer aliases // SOME value in the set. @@ -167,10 +164,10 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size, I.getTBAAInfo()))) return true; - // Check the call sites list and invoke list... - if (!CallSites.empty()) { - for (unsigned i = 0, e = CallSites.size(); i != e; ++i) - if (AA.getModRefInfo(CallSites[i], + // Check the unknown instructions... + if (!UnknownInsts.empty()) { + for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) + if (AA.getModRefInfo(UnknownInsts[i], AliasAnalysis::Location(Ptr, Size, TBAAInfo)) != AliasAnalysis::NoModRef) return true; @@ -179,18 +176,20 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size, return false; } -bool AliasSet::aliasesCallSite(CallSite CS, AliasAnalysis &AA) const { - if (AA.doesNotAccessMemory(CS)) +bool AliasSet::aliasesUnknownInst(Instruction *Inst, AliasAnalysis &AA) const { + if (!Inst->mayReadOrWriteMemory()) return false; - for (unsigned i = 0, e = CallSites.size(); i != e; ++i) { - if (AA.getModRefInfo(getCallSite(i), CS) != AliasAnalysis::NoModRef || - AA.getModRefInfo(CS, getCallSite(i)) != AliasAnalysis::NoModRef) + for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) { + CallSite C1 = getUnknownInst(i), C2 = Inst; + if (!C1 || !C2 || + AA.getModRefInfo(C1, C2) != AliasAnalysis::NoModRef || + AA.getModRefInfo(C2, C1) != AliasAnalysis::NoModRef) return true; } for (iterator I = begin(), E = end(); I != E; ++I) - if (AA.getModRefInfo(CS, I.getPointer(), I.getSize()) != + if (AA.getModRefInfo(Inst, I.getPointer(), I.getSize()) != AliasAnalysis::NoModRef) return true; @@ -244,10 +243,10 @@ bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size, -AliasSet *AliasSetTracker::findAliasSetForCallSite(CallSite CS) { +AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) { AliasSet *FoundSet = 0; for (iterator I = begin(), E = end(); I != E; ++I) { - if (I->Forward || !I->aliasesCallSite(CS, AA)) + if (I->Forward || !I->aliasesUnknownInst(Inst, AA)) continue; if (FoundSet == 0) // If this is the first alias set ptr can go into. @@ -296,22 +295,28 @@ bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) { bool AliasSetTracker::add(LoadInst *LI) { + if (LI->getOrdering() > Monotonic) return addUnknown(LI); + AliasSet::AccessType ATy = AliasSet::Refs; + if (!LI->isUnordered()) ATy = AliasSet::ModRef; bool NewPtr; AliasSet &AS = addPointer(LI->getOperand(0), AA.getTypeStoreSize(LI->getType()), LI->getMetadata(LLVMContext::MD_tbaa), - AliasSet::Refs, NewPtr); + ATy, NewPtr); if (LI->isVolatile()) AS.setVolatile(); return NewPtr; } bool AliasSetTracker::add(StoreInst *SI) { + if (SI->getOrdering() > Monotonic) return addUnknown(SI); + AliasSet::AccessType ATy = AliasSet::Mods; + if (!SI->isUnordered()) ATy = AliasSet::ModRef; bool NewPtr; Value *Val = SI->getOperand(0); AliasSet &AS = addPointer(SI->getOperand(1), AA.getTypeStoreSize(Val->getType()), SI->getMetadata(LLVMContext::MD_tbaa), - AliasSet::Mods, NewPtr); + ATy, NewPtr); if (SI->isVolatile()) AS.setVolatile(); return NewPtr; } @@ -325,20 +330,20 @@ bool AliasSetTracker::add(VAArgInst *VAAI) { } -bool AliasSetTracker::add(CallSite CS) { - if (isa<DbgInfoIntrinsic>(CS.getInstruction())) +bool AliasSetTracker::addUnknown(Instruction *Inst) { + if (isa<DbgInfoIntrinsic>(Inst)) return true; // Ignore DbgInfo Intrinsics. - if (AA.doesNotAccessMemory(CS)) + if (!Inst->mayReadOrWriteMemory()) return true; // doesn't alias anything - AliasSet *AS = findAliasSetForCallSite(CS); + AliasSet *AS = findAliasSetForUnknownInst(Inst); if (AS) { - AS->addCallSite(CS, AA); + AS->addUnknownInst(Inst, AA); return false; } AliasSets.push_back(new AliasSet()); AS = &AliasSets.back(); - AS->addCallSite(CS, AA); + AS->addUnknownInst(Inst, AA); return true; } @@ -348,13 +353,9 @@ bool AliasSetTracker::add(Instruction *I) { return add(LI); if (StoreInst *SI = dyn_cast<StoreInst>(I)) return add(SI); - if (CallInst *CI = dyn_cast<CallInst>(I)) - return add(CI); - if (InvokeInst *II = dyn_cast<InvokeInst>(I)) - return add(II); if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I)) return add(VAAI); - return true; + return addUnknown(I); } void AliasSetTracker::add(BasicBlock &BB) { @@ -375,8 +376,8 @@ void AliasSetTracker::add(const AliasSetTracker &AST) { AliasSet &AS = const_cast<AliasSet&>(*I); // If there are any call sites in the alias set, add them to this AST. - for (unsigned i = 0, e = AS.CallSites.size(); i != e; ++i) - add(AS.CallSites[i]); + for (unsigned i = 0, e = AS.UnknownInsts.size(); i != e; ++i) + add(AS.UnknownInsts[i]); // Loop over all of the pointers in this alias set. bool X; @@ -393,7 +394,7 @@ void AliasSetTracker::add(const AliasSetTracker &AST) { /// tracker. void AliasSetTracker::remove(AliasSet &AS) { // Drop all call sites. - AS.CallSites.clear(); + AS.UnknownInsts.clear(); // Clear the alias set. unsigned NumRefs = 0; @@ -453,11 +454,11 @@ bool AliasSetTracker::remove(VAArgInst *VAAI) { return true; } -bool AliasSetTracker::remove(CallSite CS) { - if (AA.doesNotAccessMemory(CS)) +bool AliasSetTracker::removeUnknown(Instruction *I) { + if (!I->mayReadOrWriteMemory()) return false; // doesn't alias anything - AliasSet *AS = findAliasSetForCallSite(CS); + AliasSet *AS = findAliasSetForUnknownInst(I); if (!AS) return false; remove(*AS); return true; @@ -469,11 +470,9 @@ bool AliasSetTracker::remove(Instruction *I) { return remove(LI); if (StoreInst *SI = dyn_cast<StoreInst>(I)) return remove(SI); - if (CallInst *CI = dyn_cast<CallInst>(I)) - return remove(CI); if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I)) return remove(VAAI); - return true; + return removeUnknown(I); } @@ -488,13 +487,13 @@ void AliasSetTracker::deleteValue(Value *PtrVal) { // If this is a call instruction, remove the callsite from the appropriate // AliasSet (if present). - if (CallSite CS = PtrVal) { - if (!AA.doesNotAccessMemory(CS)) { + if (Instruction *Inst = dyn_cast<Instruction>(PtrVal)) { + if (Inst->mayReadOrWriteMemory()) { // Scan all the alias sets to see if this call site is contained. for (iterator I = begin(), E = end(); I != E; ++I) { if (I->Forward) continue; - I->removeCallSite(CS); + I->removeUnknownInst(Inst); } } } @@ -571,11 +570,11 @@ void AliasSet::print(raw_ostream &OS) const { OS << ", " << I.getSize() << ")"; } } - if (!CallSites.empty()) { - OS << "\n " << CallSites.size() << " Call Sites: "; - for (unsigned i = 0, e = CallSites.size(); i != e; ++i) { + if (!UnknownInsts.empty()) { + OS << "\n " << UnknownInsts.size() << " Unknown instructions: "; + for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) { if (i) OS << ", "; - WriteAsOperand(OS, CallSites[i]); + WriteAsOperand(OS, UnknownInsts[i]); } } OS << "\n"; diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index 71e0a83..0ba6af9 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm-c/Analysis.h" +#include "llvm-c/Initialization.h" #include "llvm/InitializePasses.h" #include "llvm/Analysis/Verifier.h" #include <cstring> @@ -23,7 +24,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeAliasSetPrinterPass(Registry); initializeNoAAPass(Registry); initializeBasicAliasAnalysisPass(Registry); - initializeBlockFrequencyPass(Registry); + initializeBlockFrequencyInfoPass(Registry); initializeBranchProbabilityInfoPass(Registry); initializeCFGViewerPass(Registry); initializeCFGPrinterPass(Registry); diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 8330ea7..af400ba 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -30,6 +30,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/ErrorHandling.h" @@ -100,7 +101,7 @@ static bool isEscapeSource(const Value *V) { /// getObjectSize - Return the size of the object specified by V, or /// UnknownSize if unknown. static uint64_t getObjectSize(const Value *V, const TargetData &TD) { - const Type *AccessTy; + Type *AccessTy; if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { if (!GV->hasDefinitiveInitializer()) return AliasAnalysis::UnknownSize; @@ -317,7 +318,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, E = GEPOp->op_end(); I != E; ++I) { Value *Index = *I; // Compute the (potentially symbolic) offset in bytes for this index. - if (const StructType *STy = dyn_cast<StructType>(*GTI++)) { + if (StructType *STy = dyn_cast<StructType>(*GTI++)) { // For a struct, add the member offset. unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); if (FieldNo == 0) continue; @@ -374,7 +375,8 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, } if (Scale) { - VariableGEPIndex Entry = {Index, Extension, Scale}; + VariableGEPIndex Entry = {Index, Extension, + static_cast<int64_t>(Scale)}; VarIndices.push_back(Entry); } } @@ -467,6 +469,7 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AliasAnalysis>(); + AU.addRequired<TargetLibraryInfo>(); } virtual AliasResult alias(const Location &LocA, @@ -549,10 +552,15 @@ namespace { // Register this pass... char BasicAliasAnalysis::ID = 0; -INITIALIZE_AG_PASS(BasicAliasAnalysis, AliasAnalysis, "basicaa", +INITIALIZE_AG_PASS_BEGIN(BasicAliasAnalysis, AliasAnalysis, "basicaa", + "Basic Alias Analysis (stateless AA impl)", + false, true, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_AG_PASS_END(BasicAliasAnalysis, AliasAnalysis, "basicaa", "Basic Alias Analysis (stateless AA impl)", false, true, false) + ImmutablePass *llvm::createBasicAliasAnalysisPass() { return new BasicAliasAnalysis(); } @@ -706,7 +714,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, // is impossible to alias the pointer we're checking. If not, we have to // assume that the call could touch the pointer, even though it doesn't // escape. - if (!isNoAlias(Location(cast<Value>(CI)), Loc)) { + if (!isNoAlias(Location(*CI), Location(Object))) { PassedAsArg = true; break; } @@ -716,6 +724,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, return NoModRef; } + const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>(); ModRefResult Min = ModRef; // Finally, handle specific knowledge of intrinsics. @@ -754,26 +763,6 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, // We know that memset doesn't load anything. Min = Mod; break; - case Intrinsic::atomic_cmp_swap: - case Intrinsic::atomic_swap: - case Intrinsic::atomic_load_add: - case Intrinsic::atomic_load_sub: - case Intrinsic::atomic_load_and: - case Intrinsic::atomic_load_nand: - case Intrinsic::atomic_load_or: - case Intrinsic::atomic_load_xor: - case Intrinsic::atomic_load_max: - case Intrinsic::atomic_load_min: - case Intrinsic::atomic_load_umax: - case Intrinsic::atomic_load_umin: - if (TD) { - Value *Op1 = II->getArgOperand(0); - uint64_t Op1Size = TD->getTypeStoreSize(Op1->getType()); - MDNode *Tag = II->getMetadata(LLVMContext::MD_tbaa); - if (isNoAlias(Location(Op1, Op1Size, Tag), Loc)) - return NoModRef; - } - break; case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::invariant_start: { @@ -818,6 +807,39 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, } } + // We can bound the aliasing properties of memset_pattern16 just as we can + // for memcpy/memset. This is particularly important because the + // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16 + // whenever possible. + else if (TLI.has(LibFunc::memset_pattern16) && + CS.getCalledFunction() && + CS.getCalledFunction()->getName() == "memset_pattern16") { + const Function *MS = CS.getCalledFunction(); + FunctionType *MemsetType = MS->getFunctionType(); + if (!MemsetType->isVarArg() && MemsetType->getNumParams() == 3 && + isa<PointerType>(MemsetType->getParamType(0)) && + isa<PointerType>(MemsetType->getParamType(1)) && + isa<IntegerType>(MemsetType->getParamType(2))) { + uint64_t Len = UnknownSize; + if (const ConstantInt *LenCI = dyn_cast<ConstantInt>(CS.getArgument(2))) + Len = LenCI->getZExtValue(); + const Value *Dest = CS.getArgument(0); + const Value *Src = CS.getArgument(1); + // If it can't overlap the source dest, then it doesn't modref the loc. + if (isNoAlias(Location(Dest, Len), Loc)) { + // Always reads 16 bytes of the source. + if (isNoAlias(Location(Src, 16), Loc)) + return NoModRef; + // If it can't overlap the dest, then worst case it reads the loc. + Min = Ref; + // Always reads 16 bytes of the source. + } else if (isNoAlias(Location(Src, 16), Loc)) { + // If it can't overlap the source, then worst case it mutates the loc. + Min = Mod; + } + } + } + // The AliasAnalysis base class has some smarts, lets use them. return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min); } @@ -913,43 +935,43 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, if (GEP1BaseOffset == 0 && GEP1VariableIndices.empty()) return MustAlias; - // If there is a difference between the pointers, but the difference is - // less than the size of the associated memory object, then we know - // that the objects are partially overlapping. + // If there is a constant difference between the pointers, but the difference + // is less than the size of the associated memory object, then we know + // that the objects are partially overlapping. If the difference is + // greater, we know they do not overlap. if (GEP1BaseOffset != 0 && GEP1VariableIndices.empty()) { - if (GEP1BaseOffset >= 0 ? - (V2Size != UnknownSize && (uint64_t)GEP1BaseOffset < V2Size) : - (V1Size != UnknownSize && -(uint64_t)GEP1BaseOffset < V1Size && - GEP1BaseOffset != INT64_MIN)) - return PartialAlias; + if (GEP1BaseOffset >= 0) { + if (V2Size != UnknownSize) { + if ((uint64_t)GEP1BaseOffset < V2Size) + return PartialAlias; + return NoAlias; + } + } else { + if (V1Size != UnknownSize) { + if (-(uint64_t)GEP1BaseOffset < V1Size) + return PartialAlias; + return NoAlias; + } + } } - // If we have a known constant offset, see if this offset is larger than the - // access size being queried. If so, and if no variable indices can remove - // pieces of this constant, then we know we have a no-alias. For example, - // &A[100] != &A. - - // In order to handle cases like &A[100][i] where i is an out of range - // subscript, we have to ignore all constant offset pieces that are a multiple - // of a scaled index. Do this by removing constant offsets that are a - // multiple of any of our variable indices. This allows us to transform - // things like &A[i][1] because i has a stride of (e.g.) 8 bytes but the 1 - // provides an offset of 4 bytes (assuming a <= 4 byte access). - for (unsigned i = 0, e = GEP1VariableIndices.size(); - i != e && GEP1BaseOffset;++i) - if (int64_t RemovedOffset = GEP1BaseOffset/GEP1VariableIndices[i].Scale) - GEP1BaseOffset -= RemovedOffset*GEP1VariableIndices[i].Scale; - - // If our known offset is bigger than the access size, we know we don't have - // an alias. - if (GEP1BaseOffset) { - if (GEP1BaseOffset >= 0 ? - (V2Size != UnknownSize && (uint64_t)GEP1BaseOffset >= V2Size) : - (V1Size != UnknownSize && -(uint64_t)GEP1BaseOffset >= V1Size && - GEP1BaseOffset != INT64_MIN)) + // Try to distinguish something like &A[i][1] against &A[42][0]. + // Grab the least significant bit set in any of the scales. + if (!GEP1VariableIndices.empty()) { + uint64_t Modulo = 0; + for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i) + Modulo |= (uint64_t)GEP1VariableIndices[i].Scale; + Modulo = Modulo ^ (Modulo & (Modulo - 1)); + + // We can compute the difference between the two addresses + // mod Modulo. Check whether that difference guarantees that the + // two locations do not alias. + uint64_t ModOffset = (uint64_t)GEP1BaseOffset & (Modulo - 1); + if (V1Size != UnknownSize && V2Size != UnknownSize && + ModOffset >= V2Size && V1Size <= Modulo - ModOffset) return NoAlias; } - + // Statically, we can see that the base objects are the same, but the // pointers have dynamic offsets which we can't resolve. And none of our // little tricks above worked. diff --git a/lib/Analysis/BlockFrequency.cpp b/lib/Analysis/BlockFrequency.cpp deleted file mode 100644 index 4b86d1d..0000000 --- a/lib/Analysis/BlockFrequency.cpp +++ /dev/null @@ -1,59 +0,0 @@ -//=======-------- BlockFrequency.cpp - Block Frequency Analysis -------=======// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Loops should be simplified before this analysis. -// -//===----------------------------------------------------------------------===// - -#include "llvm/InitializePasses.h" -#include "llvm/Analysis/BlockFrequencyImpl.h" -#include "llvm/Analysis/BlockFrequency.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/BranchProbabilityInfo.h" - -using namespace llvm; - -INITIALIZE_PASS_BEGIN(BlockFrequency, "block-freq", "Block Frequency Analysis", - true, true) -INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo) -INITIALIZE_PASS_END(BlockFrequency, "block-freq", "Block Frequency Analysis", - true, true) - -char BlockFrequency::ID = 0; - - -BlockFrequency::BlockFrequency() : FunctionPass(ID) { - initializeBlockFrequencyPass(*PassRegistry::getPassRegistry()); - BFI = new BlockFrequencyImpl<BasicBlock, Function, BranchProbabilityInfo>(); -} - -BlockFrequency::~BlockFrequency() { - delete BFI; -} - -void BlockFrequency::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<BranchProbabilityInfo>(); - AU.setPreservesAll(); -} - -bool BlockFrequency::runOnFunction(Function &F) { - BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>(); - BFI->doFunction(&F, &BPI); - return false; -} - -/// getblockFreq - Return block frequency. Never return 0, value must be -/// positive. Please note that initial frequency is equal to 1024. It means that -/// we should not rely on the value itself, but only on the comparison to the -/// other block frequencies. We do this to avoid using of floating points. -/// -uint32_t BlockFrequency::getBlockFreq(BasicBlock *BB) { - return BFI->getBlockFreq(BB); -} diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp new file mode 100644 index 0000000..d16665f --- /dev/null +++ b/lib/Analysis/BlockFrequencyInfo.cpp @@ -0,0 +1,63 @@ +//=======-------- BlockFrequencyInfo.cpp - Block Frequency Analysis -------=======// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Loops should be simplified before this analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/InitializePasses.h" +#include "llvm/Analysis/BlockFrequencyImpl.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" + +using namespace llvm; + +INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis", + true, true) +INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo) +INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis", + true, true) + +char BlockFrequencyInfo::ID = 0; + + +BlockFrequencyInfo::BlockFrequencyInfo() : FunctionPass(ID) { + initializeBlockFrequencyInfoPass(*PassRegistry::getPassRegistry()); + BFI = new BlockFrequencyImpl<BasicBlock, Function, BranchProbabilityInfo>(); +} + +BlockFrequencyInfo::~BlockFrequencyInfo() { + delete BFI; +} + +void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<BranchProbabilityInfo>(); + AU.setPreservesAll(); +} + +bool BlockFrequencyInfo::runOnFunction(Function &F) { + BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>(); + BFI->doFunction(&F, &BPI); + return false; +} + +void BlockFrequencyInfo::print(raw_ostream &O, const Module *) const { + if (BFI) BFI->print(O); +} + +/// getblockFreq - Return block frequency. Return 0 if we don't have the +/// information. Please note that initial frequency is equal to 1024. It means +/// that we should not rely on the value itself, but only on the comparison to +/// the other block frequencies. We do this to avoid using of floating points. +/// +BlockFrequency BlockFrequencyInfo::getBlockFreq(BasicBlock *BB) const { + return BFI->getBlockFreq(BB); +} diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index e39cd22..bde3b76 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -11,7 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Constants.h" #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" +#include "llvm/Metadata.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Support/Debug.h" @@ -33,7 +36,7 @@ namespace { // private methods are hidden in the .cpp file. class BranchProbabilityAnalysis { - typedef std::pair<BasicBlock *, BasicBlock *> Edge; + typedef std::pair<const BasicBlock *, const BasicBlock *> Edge; DenseMap<Edge, uint32_t> *Weights; @@ -52,7 +55,7 @@ class BranchProbabilityAnalysis { // V // BB1<-+ // | | - // | | (Weight = 128) + // | | (Weight = 124) // V | // BB2--+ // | @@ -60,12 +63,21 @@ class BranchProbabilityAnalysis { // V // BB3 // - // Probability of the edge BB2->BB1 = 128 / (128 + 4) = 0.9696.. - // Probability of the edge BB2->BB3 = 4 / (128 + 4) = 0.0303.. + // Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875 + // Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125 - static const uint32_t LBH_TAKEN_WEIGHT = 128; + static const uint32_t LBH_TAKEN_WEIGHT = 124; static const uint32_t LBH_NONTAKEN_WEIGHT = 4; + static const uint32_t RH_TAKEN_WEIGHT = 24; + static const uint32_t RH_NONTAKEN_WEIGHT = 8; + + static const uint32_t PH_TAKEN_WEIGHT = 20; + static const uint32_t PH_NONTAKEN_WEIGHT = 12; + + static const uint32_t ZH_TAKEN_WEIGHT = 20; + static const uint32_t ZH_NONTAKEN_WEIGHT = 12; + // Standard weight value. Used when none of the heuristics set weight for // the edge. static const uint32_t NORMAL_WEIGHT = 16; @@ -100,29 +112,6 @@ class BranchProbabilityAnalysis { return false; } - // Multiply Edge Weight by two. - void incEdgeWeight(BasicBlock *Src, BasicBlock *Dst) { - uint32_t Weight = BP->getEdgeWeight(Src, Dst); - uint32_t MaxWeight = getMaxWeightFor(Src); - - if (Weight * 2 > MaxWeight) - BP->setEdgeWeight(Src, Dst, MaxWeight); - else - BP->setEdgeWeight(Src, Dst, Weight * 2); - } - - // Divide Edge Weight by two. - void decEdgeWeight(BasicBlock *Src, BasicBlock *Dst) { - uint32_t Weight = BP->getEdgeWeight(Src, Dst); - - assert(Weight > 0); - if (Weight / 2 < MIN_WEIGHT) - BP->setEdgeWeight(Src, Dst, MIN_WEIGHT); - else - BP->setEdgeWeight(Src, Dst, Weight / 2); - } - - uint32_t getMaxWeightFor(BasicBlock *BB) const { return UINT32_MAX / BB->getTerminator()->getNumSuccessors(); } @@ -133,49 +122,119 @@ public: : Weights(W), BP(BP), LI(LI) { } + // Metadata Weights + bool calcMetadataWeights(BasicBlock *BB); + // Return Heuristics - void calcReturnHeuristics(BasicBlock *BB); + bool calcReturnHeuristics(BasicBlock *BB); // Pointer Heuristics - void calcPointerHeuristics(BasicBlock *BB); + bool calcPointerHeuristics(BasicBlock *BB); // Loop Branch Heuristics - void calcLoopBranchHeuristics(BasicBlock *BB); + bool calcLoopBranchHeuristics(BasicBlock *BB); + + // Zero Heurestics + bool calcZeroHeuristics(BasicBlock *BB); bool runOnFunction(Function &F); }; } // end anonymous namespace +// Propagate existing explicit probabilities from either profile data or +// 'expect' intrinsic processing. +bool BranchProbabilityAnalysis::calcMetadataWeights(BasicBlock *BB) { + TerminatorInst *TI = BB->getTerminator(); + if (TI->getNumSuccessors() == 1) + return false; + if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI)) + return false; + + MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof); + if (!WeightsNode) + return false; + + // Ensure there are weights for all of the successors. Note that the first + // operand to the metadata node is a name, not a weight. + if (WeightsNode->getNumOperands() != TI->getNumSuccessors() + 1) + return false; + + // Build up the final weights that will be used in a temporary buffer, but + // don't add them until all weihts are present. Each weight value is clamped + // to [1, getMaxWeightFor(BB)]. + uint32_t WeightLimit = getMaxWeightFor(BB); + SmallVector<uint32_t, 2> Weights; + Weights.reserve(TI->getNumSuccessors()); + for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) { + ConstantInt *Weight = dyn_cast<ConstantInt>(WeightsNode->getOperand(i)); + if (!Weight) + return false; + Weights.push_back( + std::max<uint32_t>(1, Weight->getLimitedValue(WeightLimit))); + } + assert(Weights.size() == TI->getNumSuccessors() && "Checked above"); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + BP->setEdgeWeight(BB, TI->getSuccessor(i), Weights[i]); + + return true; +} + // Calculate Edge Weights using "Return Heuristics". Predict a successor which // leads directly to Return Instruction will not be taken. -void BranchProbabilityAnalysis::calcReturnHeuristics(BasicBlock *BB){ +bool BranchProbabilityAnalysis::calcReturnHeuristics(BasicBlock *BB){ if (BB->getTerminator()->getNumSuccessors() == 1) - return; + return false; + + SmallPtrSet<BasicBlock *, 4> ReturningEdges; + SmallPtrSet<BasicBlock *, 4> StayEdges; for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { BasicBlock *Succ = *I; - if (isReturningBlock(Succ)) { - decEdgeWeight(BB, Succ); + if (isReturningBlock(Succ)) + ReturningEdges.insert(Succ); + else + StayEdges.insert(Succ); + } + + if (uint32_t numStayEdges = StayEdges.size()) { + uint32_t stayWeight = RH_TAKEN_WEIGHT / numStayEdges; + if (stayWeight < NORMAL_WEIGHT) + stayWeight = NORMAL_WEIGHT; + + for (SmallPtrSet<BasicBlock *, 4>::iterator I = StayEdges.begin(), + E = StayEdges.end(); I != E; ++I) + BP->setEdgeWeight(BB, *I, stayWeight); + } + + if (uint32_t numRetEdges = ReturningEdges.size()) { + uint32_t retWeight = RH_NONTAKEN_WEIGHT / numRetEdges; + if (retWeight < MIN_WEIGHT) + retWeight = MIN_WEIGHT; + for (SmallPtrSet<BasicBlock *, 4>::iterator I = ReturningEdges.begin(), + E = ReturningEdges.end(); I != E; ++I) { + BP->setEdgeWeight(BB, *I, retWeight); } } + + return ReturningEdges.size() > 0; } // Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion // between two pointer or pointer and NULL will fail. -void BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) { +bool BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) { BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator()); if (!BI || !BI->isConditional()) - return; + return false; Value *Cond = BI->getCondition(); ICmpInst *CI = dyn_cast<ICmpInst>(Cond); if (!CI || !CI->isEquality()) - return; + return false; Value *LHS = CI->getOperand(0); if (!LHS->getType()->isPointerTy()) - return; + return false; assert(CI->getOperand(1)->getType()->isPointerTy()); @@ -190,29 +249,35 @@ void BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) { if (!isProb) std::swap(Taken, NonTaken); - incEdgeWeight(BB, Taken); - decEdgeWeight(BB, NonTaken); + BP->setEdgeWeight(BB, Taken, PH_TAKEN_WEIGHT); + BP->setEdgeWeight(BB, NonTaken, PH_NONTAKEN_WEIGHT); + return true; } // Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges // as taken, exiting edges as not-taken. -void BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) { +bool BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) { uint32_t numSuccs = BB->getTerminator()->getNumSuccessors(); Loop *L = LI->getLoopFor(BB); if (!L) - return; + return false; + + SmallPtrSet<BasicBlock *, 8> BackEdges; + SmallPtrSet<BasicBlock *, 8> ExitingEdges; + SmallPtrSet<BasicBlock *, 8> InEdges; // Edges from header to the loop. - SmallVector<BasicBlock *, 8> BackEdges; - SmallVector<BasicBlock *, 8> ExitingEdges; + bool isHeader = BB == L->getHeader(); for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { BasicBlock *Succ = *I; Loop *SuccL = LI->getLoopFor(Succ); if (SuccL != L) - ExitingEdges.push_back(Succ); + ExitingEdges.insert(Succ); else if (Succ == L->getHeader()) - BackEdges.push_back(Succ); + BackEdges.insert(Succ); + else if (isHeader) + InEdges.insert(Succ); } if (uint32_t numBackEdges = BackEdges.size()) { @@ -220,39 +285,121 @@ void BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) { if (backWeight < NORMAL_WEIGHT) backWeight = NORMAL_WEIGHT; - for (SmallVector<BasicBlock *, 8>::iterator EI = BackEdges.begin(), + for (SmallPtrSet<BasicBlock *, 8>::iterator EI = BackEdges.begin(), EE = BackEdges.end(); EI != EE; ++EI) { BasicBlock *Back = *EI; BP->setEdgeWeight(BB, Back, backWeight); } } + if (uint32_t numInEdges = InEdges.size()) { + uint32_t inWeight = LBH_TAKEN_WEIGHT / numInEdges; + if (inWeight < NORMAL_WEIGHT) + inWeight = NORMAL_WEIGHT; + + for (SmallPtrSet<BasicBlock *, 8>::iterator EI = InEdges.begin(), + EE = InEdges.end(); EI != EE; ++EI) { + BasicBlock *Back = *EI; + BP->setEdgeWeight(BB, Back, inWeight); + } + } + uint32_t numExitingEdges = ExitingEdges.size(); if (uint32_t numNonExitingEdges = numSuccs - numExitingEdges) { uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numNonExitingEdges; if (exitWeight < MIN_WEIGHT) exitWeight = MIN_WEIGHT; - for (SmallVector<BasicBlock *, 8>::iterator EI = ExitingEdges.begin(), + for (SmallPtrSet<BasicBlock *, 8>::iterator EI = ExitingEdges.begin(), EE = ExitingEdges.end(); EI != EE; ++EI) { BasicBlock *Exiting = *EI; BP->setEdgeWeight(BB, Exiting, exitWeight); } } + + return true; } +bool BranchProbabilityAnalysis::calcZeroHeuristics(BasicBlock *BB) { + BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator()); + if (!BI || !BI->isConditional()) + return false; + + Value *Cond = BI->getCondition(); + ICmpInst *CI = dyn_cast<ICmpInst>(Cond); + if (!CI) + return false; + + Value *RHS = CI->getOperand(1); + ConstantInt *CV = dyn_cast<ConstantInt>(RHS); + if (!CV) + return false; + + bool isProb; + if (CV->isZero()) { + switch (CI->getPredicate()) { + case CmpInst::ICMP_EQ: + // X == 0 -> Unlikely + isProb = false; + break; + case CmpInst::ICMP_NE: + // X != 0 -> Likely + isProb = true; + break; + case CmpInst::ICMP_SLT: + // X < 0 -> Unlikely + isProb = false; + break; + case CmpInst::ICMP_SGT: + // X > 0 -> Likely + isProb = true; + break; + default: + return false; + } + } else if (CV->isOne() && CI->getPredicate() == CmpInst::ICMP_SLT) { + // InstCombine canonicalizes X <= 0 into X < 1. + // X <= 0 -> Unlikely + isProb = false; + } else if (CV->isAllOnesValue() && CI->getPredicate() == CmpInst::ICMP_SGT) { + // InstCombine canonicalizes X >= 0 into X > -1. + // X >= 0 -> Likely + isProb = true; + } else { + return false; + } + + BasicBlock *Taken = BI->getSuccessor(0); + BasicBlock *NonTaken = BI->getSuccessor(1); + + if (!isProb) + std::swap(Taken, NonTaken); + + BP->setEdgeWeight(BB, Taken, ZH_TAKEN_WEIGHT); + BP->setEdgeWeight(BB, NonTaken, ZH_NONTAKEN_WEIGHT); + + return true; +} + + bool BranchProbabilityAnalysis::runOnFunction(Function &F) { for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { BasicBlock *BB = I++; - // Only LBH uses setEdgeWeight method. - calcLoopBranchHeuristics(BB); + if (calcMetadataWeights(BB)) + continue; + + if (calcLoopBranchHeuristics(BB)) + continue; - // PH and RH use only incEdgeWeight and decEwdgeWeight methods to - // not efface LBH results. - calcPointerHeuristics(BB); - calcReturnHeuristics(BB); + if (calcReturnHeuristics(BB)) + continue; + + if (calcPointerHeuristics(BB)) + continue; + + calcZeroHeuristics(BB); } return false; @@ -269,11 +416,11 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) { return BPA.runOnFunction(F); } -uint32_t BranchProbabilityInfo::getSumForBlock(BasicBlock *BB) const { +uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const { uint32_t Sum = 0; - for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { - BasicBlock *Succ = *I; + for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + const BasicBlock *Succ = *I; uint32_t Weight = getEdgeWeight(BB, Succ); uint32_t PrevSum = Sum; @@ -284,7 +431,8 @@ uint32_t BranchProbabilityInfo::getSumForBlock(BasicBlock *BB) const { return Sum; } -bool BranchProbabilityInfo::isEdgeHot(BasicBlock *Src, BasicBlock *Dst) const { +bool BranchProbabilityInfo:: +isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const { // Hot probability is at least 4/5 = 80% uint32_t Weight = getEdgeWeight(Src, Dst); uint32_t Sum = getSumForBlock(Src); @@ -321,8 +469,8 @@ BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const { } // Return edge's weight. If can't find it, return DEFAULT_WEIGHT value. -uint32_t -BranchProbabilityInfo::getEdgeWeight(BasicBlock *Src, BasicBlock *Dst) const { +uint32_t BranchProbabilityInfo:: +getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const { Edge E(Src, Dst); DenseMap<Edge, uint32_t>::const_iterator I = Weights.find(E); @@ -332,8 +480,8 @@ BranchProbabilityInfo::getEdgeWeight(BasicBlock *Src, BasicBlock *Dst) const { return DEFAULT_WEIGHT; } -void BranchProbabilityInfo::setEdgeWeight(BasicBlock *Src, BasicBlock *Dst, - uint32_t Weight) { +void BranchProbabilityInfo:: +setEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst, uint32_t Weight) { Weights[std::make_pair(Src, Dst)] = Weight; DEBUG(dbgs() << "set edge " << Src->getNameStr() << " -> " << Dst->getNameStr() << " weight to " << Weight @@ -342,7 +490,7 @@ void BranchProbabilityInfo::setEdgeWeight(BasicBlock *Src, BasicBlock *Dst, BranchProbability BranchProbabilityInfo:: -getEdgeProbability(BasicBlock *Src, BasicBlock *Dst) const { +getEdgeProbability(const BasicBlock *Src, const BasicBlock *Dst) const { uint32_t N = getEdgeWeight(Src, Dst); uint32_t D = getSumForBlock(Src); diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index ab846a2..e79459d 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -6,7 +6,7 @@ add_llvm_library(LLVMAnalysis AliasSetTracker.cpp Analysis.cpp BasicAliasAnalysis.cpp - BlockFrequency.cpp + BlockFrequencyInfo.cpp BranchProbabilityInfo.cpp CFGPrinter.cpp CaptureTracking.cpp @@ -58,4 +58,10 @@ add_llvm_library(LLVMAnalysis ValueTracking.cpp ) +add_llvm_library_dependencies(LLVMAnalysis + LLVMCore + LLVMSupport + LLVMTarget + ) + add_subdirectory(IPA) diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 7fca17e..df79849 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -43,11 +43,16 @@ using namespace llvm; /// FoldBitCast - Constant fold bitcast, symbolically evaluating it with /// TargetData. This always returns a non-null constant, but it may be a /// ConstantExpr if unfoldable. -static Constant *FoldBitCast(Constant *C, const Type *DestTy, +static Constant *FoldBitCast(Constant *C, Type *DestTy, const TargetData &TD) { - - // This only handles casts to vectors currently. - const VectorType *DestVTy = dyn_cast<VectorType>(DestTy); + // Catch the obvious splat cases. + if (C->isNullValue() && !DestTy->isX86_MMXTy()) + return Constant::getNullValue(DestTy); + if (C->isAllOnesValue() && !DestTy->isX86_MMXTy()) + return Constant::getAllOnesValue(DestTy); + + // The code below only handles casts to vectors currently. + VectorType *DestVTy = dyn_cast<VectorType>(DestTy); if (DestVTy == 0) return ConstantExpr::getBitCast(C, DestTy); @@ -69,8 +74,8 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy, if (NumDstElt == NumSrcElt) return ConstantExpr::getBitCast(C, DestTy); - const Type *SrcEltTy = CV->getType()->getElementType(); - const Type *DstEltTy = DestVTy->getElementType(); + Type *SrcEltTy = CV->getType()->getElementType(); + Type *DstEltTy = DestVTy->getElementType(); // Otherwise, we're changing the number of elements in a vector, which // requires endianness information to do the right thing. For example, @@ -85,7 +90,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy, if (DstEltTy->isFloatingPointTy()) { // Fold to an vector of integers with same size as our FP type. unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits(); - const Type *DestIVTy = + Type *DestIVTy = VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt); // Recursively handle this integer conversion, if possible. C = FoldBitCast(C, DestIVTy, TD); @@ -99,7 +104,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy, // it to integer first. if (SrcEltTy->isFloatingPointTy()) { unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); - const Type *SrcIVTy = + Type *SrcIVTy = VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt); // Ask VMCore to do the conversion now that #elts line up. C = ConstantExpr::getBitCast(C, SrcIVTy); @@ -212,11 +217,11 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, if (!CI) return false; // Index isn't a simple constant? if (CI->isZero()) continue; // Not adding anything. - if (const StructType *ST = dyn_cast<StructType>(*GTI)) { + if (StructType *ST = dyn_cast<StructType>(*GTI)) { // N = N + Offset Offset += TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue()); } else { - const SequentialType *SQT = cast<SequentialType>(*GTI); + SequentialType *SQT = cast<SequentialType>(*GTI); Offset += TD.getTypeAllocSize(SQT->getElementType())*CI->getSExtValue(); } } @@ -354,8 +359,8 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, const TargetData &TD) { - const Type *LoadTy = cast<PointerType>(C->getType())->getElementType(); - const IntegerType *IntType = dyn_cast<IntegerType>(LoadTy); + Type *LoadTy = cast<PointerType>(C->getType())->getElementType(); + IntegerType *IntType = dyn_cast<IntegerType>(LoadTy); // If this isn't an integer load we can't fold it directly. if (!IntType) { @@ -363,7 +368,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, // and then bitcast the result. This can be useful for union cases. Note // that address spaces don't matter here since we're not going to result in // an actual new load. - const Type *MapTy; + Type *MapTy; if (LoadTy->isFloatTy()) MapTy = Type::getInt32PtrTy(C->getContext()); else if (LoadTy->isDoubleTy()) @@ -443,7 +448,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, std::string Str; if (TD && GetConstantStringInfo(CE, Str) && !Str.empty()) { unsigned StrLen = Str.length(); - const Type *Ty = cast<PointerType>(CE->getType())->getElementType(); + Type *Ty = cast<PointerType>(CE->getType())->getElementType(); unsigned NumBits = Ty->getPrimitiveSizeInBits(); // Replace load with immediate integer if the result is an integer or fp // value. @@ -478,7 +483,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(CE, TD))) { if (GV->isConstant() && GV->hasDefinitiveInitializer()) { - const Type *ResTy = cast<PointerType>(C->getType())->getElementType(); + Type *ResTy = cast<PointerType>(C->getType())->getElementType(); if (GV->getInitializer()->isNullValue()) return Constant::getNullValue(ResTy); if (isa<UndefValue>(GV->getInitializer())) @@ -536,19 +541,18 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, /// CastGEPIndices - If array indices are not pointer-sized integers, /// explicitly cast them so that they aren't implicitly casted by the /// getelementptr. -static Constant *CastGEPIndices(Constant *const *Ops, unsigned NumOps, - const Type *ResultTy, +static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, + Type *ResultTy, const TargetData *TD) { if (!TD) return 0; - const Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext()); + Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext()); bool Any = false; SmallVector<Constant*, 32> NewIdxs; - for (unsigned i = 1; i != NumOps; ++i) { + for (unsigned i = 1, e = Ops.size(); i != e; ++i) { if ((i == 1 || !isa<StructType>(GetElementPtrInst::getIndexedType(Ops[0]->getType(), - reinterpret_cast<Value *const *>(Ops+1), - i-1))) && + Ops.slice(1, i-1)))) && Ops[i]->getType() != IntPtrTy) { Any = true; NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i], @@ -562,7 +566,7 @@ static Constant *CastGEPIndices(Constant *const *Ops, unsigned NumOps, if (!Any) return 0; Constant *C = - ConstantExpr::getGetElementPtr(Ops[0], &NewIdxs[0], NewIdxs.size()); + ConstantExpr::getGetElementPtr(Ops[0], NewIdxs); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) C = Folded; @@ -571,23 +575,23 @@ static Constant *CastGEPIndices(Constant *const *Ops, unsigned NumOps, /// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP /// constant expression, do so. -static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, - const Type *ResultTy, +static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, + Type *ResultTy, const TargetData *TD) { Constant *Ptr = Ops[0]; if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized()) return 0; - const Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext()); + Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext()); // If this is a constant expr gep that is effectively computing an // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12' - for (unsigned i = 1; i != NumOps; ++i) + for (unsigned i = 1, e = Ops.size(); i != e; ++i) if (!isa<ConstantInt>(Ops[i])) { // If this is "gep i8* Ptr, (sub 0, V)", fold this as: // "inttoptr (sub (ptrtoint Ptr), V)" - if (NumOps == 2 && + if (Ops.size() == 2 && cast<PointerType>(ResultTy)->getElementType()->isIntegerTy(8)) { ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[1]); assert((CE == 0 || CE->getType() == IntPtrTy) && @@ -606,9 +610,10 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, } unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy); - APInt Offset = APInt(BitWidth, - TD->getIndexedOffset(Ptr->getType(), - (Value**)Ops+1, NumOps-1)); + APInt Offset = + APInt(BitWidth, TD->getIndexedOffset(Ptr->getType(), + makeArrayRef((Value **)Ops.data() + 1, + Ops.size() - 1))); Ptr = cast<Constant>(Ptr->stripPointerCasts()); // If this is a GEP of a GEP, fold it all into a single GEP. @@ -627,9 +632,7 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, Ptr = cast<Constant>(GEP->getOperand(0)); Offset += APInt(BitWidth, - TD->getIndexedOffset(Ptr->getType(), - (Value**)NestedOps.data(), - NestedOps.size())); + TD->getIndexedOffset(Ptr->getType(), NestedOps)); Ptr = cast<Constant>(Ptr->stripPointerCasts()); } @@ -649,10 +652,10 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, // we eliminate over-indexing of the notional static type array bounds. // This makes it easy to determine if the getelementptr is "inbounds". // Also, this helps GlobalOpt do SROA on GlobalVariables. - const Type *Ty = Ptr->getType(); + Type *Ty = Ptr->getType(); SmallVector<Constant*, 32> NewIdxs; do { - if (const SequentialType *ATy = dyn_cast<SequentialType>(Ty)) { + if (SequentialType *ATy = dyn_cast<SequentialType>(Ty)) { if (ATy->isPointerTy()) { // The only pointer indexing we'll do is on the first index of the GEP. if (!NewIdxs.empty()) @@ -665,7 +668,7 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, // Determine which element of the array the offset points into. APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType())); - const IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext()); + IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext()); if (ElemSize == 0) // The element size is 0. This may be [0 x Ty]*, so just use a zero // index for this level and proceed to the next level to see if it can @@ -679,7 +682,7 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, NewIdxs.push_back(ConstantInt::get(IntPtrTy, NewIdx)); } Ty = ATy->getElementType(); - } else if (const StructType *STy = dyn_cast<StructType>(Ty)) { + } else if (StructType *STy = dyn_cast<StructType>(Ty)) { // Determine which field of the struct the offset points into. The // getZExtValue is at least as safe as the StructLayout API because we // know the offset is within the struct at this point. @@ -703,7 +706,7 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, // Create a GEP. Constant *C = - ConstantExpr::getGetElementPtr(Ptr, &NewIdxs[0], NewIdxs.size()); + ConstantExpr::getGetElementPtr(Ptr, NewIdxs); assert(cast<PointerType>(C->getType())->getElementType() == Ty && "Computed GetElementPtr has unexpected type!"); @@ -778,8 +781,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) { cast<Constant>(EVI->getAggregateOperand()), EVI->getIndices()); - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), - Ops.data(), Ops.size(), TD); + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD); } /// ConstantFoldConstantExpression - Attempt to fold the constant expression @@ -800,8 +802,7 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, if (CE->isCompare()) return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1], TD); - return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), - Ops.data(), Ops.size(), TD); + return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD); } /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the @@ -814,8 +815,8 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, /// information, due to only being passed an opcode and operands. Constant /// folding using this function strips this information. /// -Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, - Constant* const* Ops, unsigned NumOps, +Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, + ArrayRef<Constant *> Ops, const TargetData *TD) { // Handle easy binops first. if (Instruction::isBinaryOp(Opcode)) { @@ -831,9 +832,9 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, case Instruction::ICmp: case Instruction::FCmp: assert(0 && "Invalid for compares"); case Instruction::Call: - if (Function *F = dyn_cast<Function>(Ops[NumOps - 1])) + if (Function *F = dyn_cast<Function>(Ops.back())) if (canConstantFoldCallTo(F)) - return ConstantFoldCall(F, Ops, NumOps - 1); + return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1)); return 0; case Instruction::PtrToInt: // If the input is a inttoptr, eliminate the pair. This requires knowing @@ -887,12 +888,12 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, case Instruction::ShuffleVector: return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]); case Instruction::GetElementPtr: - if (Constant *C = CastGEPIndices(Ops, NumOps, DestTy, TD)) + if (Constant *C = CastGEPIndices(Ops, DestTy, TD)) return C; - if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, TD)) + if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD)) return C; - return ConstantExpr::getGetElementPtr(Ops[0], Ops+1, NumOps-1); + return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1)); } } @@ -912,7 +913,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, // around to know if bit truncation is happening. if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) { if (TD && Ops1->isNullValue()) { - const Type *IntPtrTy = TD->getIntPtrType(CE0->getContext()); + Type *IntPtrTy = TD->getIntPtrType(CE0->getContext()); if (CE0->getOpcode() == Instruction::IntToPtr) { // Convert the integer value to the right size to ensure we get the // proper extension or truncation. @@ -934,7 +935,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) { if (TD && CE0->getOpcode() == CE1->getOpcode()) { - const Type *IntPtrTy = TD->getIntPtrType(CE0->getContext()); + Type *IntPtrTy = TD->getIntPtrType(CE0->getContext()); if (CE0->getOpcode() == Instruction::IntToPtr) { // Convert the integer value to the right size to ensure we get the @@ -967,7 +968,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, unsigned OpC = Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; Constant *Ops[] = { LHS, RHS }; - return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, 2, TD); + return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD); } } @@ -987,7 +988,7 @@ Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, // addressing... gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE); for (++I; I != E; ++I) - if (const StructType *STy = dyn_cast<StructType>(*I)) { + if (StructType *STy = dyn_cast<StructType>(*I)) { ConstantInt *CU = cast<ConstantInt>(I.getOperand()); assert(CU->getZExtValue() < STy->getNumElements() && "Struct index out of range!"); @@ -1002,7 +1003,7 @@ Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, return 0; } } else if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand())) { - if (const ArrayType *ATy = dyn_cast<ArrayType>(*I)) { + if (ArrayType *ATy = dyn_cast<ArrayType>(*I)) { if (CI->getZExtValue() >= ATy->getNumElements()) return 0; if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) @@ -1013,7 +1014,7 @@ Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, C = UndefValue::get(ATy->getElementType()); else return 0; - } else if (const VectorType *VTy = dyn_cast<VectorType>(*I)) { + } else if (VectorType *VTy = dyn_cast<VectorType>(*I)) { if (CI->getZExtValue() >= VTy->getNumElements()) return 0; if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) @@ -1101,7 +1102,7 @@ llvm::canConstantFoldCallTo(const Function *F) { } static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, - const Type *Ty) { + Type *Ty) { sys::llvm_fenv_clearexcept(); V = NativeFP(V); if (sys::llvm_fenv_testexcept()) { @@ -1118,7 +1119,7 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, } static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), - double V, double W, const Type *Ty) { + double V, double W, Type *Ty) { sys::llvm_fenv_clearexcept(); V = NativeFP(V, W); if (sys::llvm_fenv_testexcept()) { @@ -1143,7 +1144,7 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), /// performed, otherwise returns the Constant value resulting from the /// conversion. static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero, - const Type *Ty) { + Type *Ty) { assert(Op && "Called with NULL operand"); APFloat Val(Op->getValueAPF()); @@ -1167,13 +1168,12 @@ static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero, /// ConstantFoldCall - Attempt to constant fold a call to the specified function /// with the specified arguments, returning null if unsuccessful. Constant * -llvm::ConstantFoldCall(Function *F, - Constant *const *Operands, unsigned NumOperands) { +llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) { if (!F->hasName()) return 0; StringRef Name = F->getName(); - const Type *Ty = F->getReturnType(); - if (NumOperands == 1) { + Type *Ty = F->getReturnType(); + if (Operands.size() == 1) { if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) { if (F->getIntrinsicID() == Intrinsic::convert_to_fp16) { APFloat Val(Op->getValueAPF()); @@ -1327,7 +1327,7 @@ llvm::ConstantFoldCall(Function *F, return 0; } - if (NumOperands == 2) { + if (Operands.size() == 2) { if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) { if (!Ty->isFloatTy() && !Ty->isDoubleTy()) return 0; diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp index ac5eeeb..bfa429d 100644 --- a/lib/Analysis/DIBuilder.cpp +++ b/lib/Analysis/DIBuilder.cpp @@ -29,14 +29,74 @@ static Constant *GetTagConstant(LLVMContext &VMContext, unsigned Tag) { } DIBuilder::DIBuilder(Module &m) - : M(m), VMContext(M.getContext()), TheCU(0), DeclareFn(0), ValueFn(0) {} + : M(m), VMContext(M.getContext()), TheCU(0), TempEnumTypes(0), + TempRetainTypes(0), TempSubprograms(0), TempGVs(0), DeclareFn(0), + ValueFn(0) +{} + +/// finalize - Construct any deferred debug info descriptors. +void DIBuilder::finalize() { + DIArray Enums = getOrCreateArray(AllEnumTypes); + DIType(TempEnumTypes).replaceAllUsesWith(Enums); + + DIArray RetainTypes = getOrCreateArray(AllRetainTypes); + DIType(TempRetainTypes).replaceAllUsesWith(RetainTypes); + + DIArray SPs = getOrCreateArray(AllSubprograms); + DIType(TempSubprograms).replaceAllUsesWith(SPs); + for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { + DISubprogram SP(SPs.getElement(i)); + if (NamedMDNode *NMD = getFnSpecificMDNode(M, SP)) { + SmallVector<Value *, 4> Variables; + for (unsigned ii = 0, ee = NMD->getNumOperands(); ii != ee; ++ii) + Variables.push_back(NMD->getOperand(ii)); + if (MDNode *Temp = SP.getVariablesNodes()) { + DIArray AV = getOrCreateArray(Variables); + DIType(Temp).replaceAllUsesWith(AV); + } + NMD->eraseFromParent(); + } + } + + DIArray GVs = getOrCreateArray(AllGVs); + DIType(TempGVs).replaceAllUsesWith(GVs); +} + +/// getNonCompileUnitScope - If N is compile unit return NULL otherwise return +/// N. +static MDNode *getNonCompileUnitScope(MDNode *N) { + if (DIDescriptor(N).isCompileUnit()) + return NULL; + return N; +} /// createCompileUnit - A CompileUnit provides an anchor for all debugging /// information generated during this instance of compilation. -void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, - StringRef Directory, StringRef Producer, - bool isOptimized, StringRef Flags, +void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, + StringRef Directory, StringRef Producer, + bool isOptimized, StringRef Flags, unsigned RunTimeVer) { + assert (Lang <= dwarf::DW_LANG_D && Lang >= dwarf::DW_LANG_C89 + && "Invalid Language tag"); + assert (!Filename.empty() + && "Unable to create compile unit without filename"); + Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; + TempEnumTypes = MDNode::getTemporary(VMContext, TElts); + Value *THElts[] = { TempEnumTypes }; + MDNode *EnumHolder = MDNode::get(VMContext, THElts); + + TempRetainTypes = MDNode::getTemporary(VMContext, TElts); + Value *TRElts[] = { TempRetainTypes }; + MDNode *RetainHolder = MDNode::get(VMContext, TRElts); + + TempSubprograms = MDNode::getTemporary(VMContext, TElts); + Value *TSElts[] = { TempSubprograms }; + MDNode *SPHolder = MDNode::get(VMContext, TSElts); + + TempGVs = MDNode::getTemporary(VMContext, TElts); + Value *TVElts[] = { TempGVs }; + MDNode *GVHolder = MDNode::get(VMContext, TVElts); + Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), @@ -48,7 +108,11 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, ConstantInt::get(Type::getInt1Ty(VMContext), true), // isMain ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), MDString::get(VMContext, Flags), - ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer) + ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer), + EnumHolder, + RetainHolder, + SPHolder, + GVHolder }; TheCU = DICompileUnit(MDNode::get(VMContext, Elts)); @@ -61,17 +125,19 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, /// for a file. DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) { assert(TheCU && "Unable to create DW_TAG_file_type without CompileUnit"); + assert(!Filename.empty() && "Unable to create file without name"); Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_file_type), MDString::get(VMContext, Filename), MDString::get(VMContext, Directory), - TheCU + NULL // TheCU }; return DIFile(MDNode::get(VMContext, Elts)); } /// createEnumerator - Create a single enumerator value. DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) { + assert(!Name.empty() && "Unable to create enumerator without name"); Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_enumerator), MDString::get(VMContext, Name), @@ -80,16 +146,37 @@ DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) { return DIEnumerator(MDNode::get(VMContext, Elts)); } -/// createBasicType - Create debugging information entry for a basic +/// createNullPtrType - Create C++0x nullptr type. +DIType DIBuilder::createNullPtrType(StringRef Name) { + assert(!Name.empty() && "Unable to create type without name"); + // nullptr is encoded in DIBasicType format. Line number, filename, + // ,size, alignment, offset and flags are always empty here. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_type), + NULL, //TheCU, + MDString::get(VMContext, Name), + NULL, // Filename + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags; + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Encoding + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createBasicType - Create debugging information entry for a basic /// type, e.g 'char'. -DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, +DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, uint64_t AlignInBits, unsigned Encoding) { + assert(!Name.empty() && "Unable to create type without name"); // Basic types are encoded in DIBasicType format. Line number, filename, // offset and flags are always empty here. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_base_type), - TheCU, + NULL, //TheCU, MDString::get(VMContext, Name), NULL, // Filename ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line @@ -108,7 +195,7 @@ DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) { // Qualified types are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, Tag), - TheCU, + NULL, //TheCU, MDString::get(VMContext, StringRef()), // Empty name. NULL, // Filename ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line @@ -127,7 +214,7 @@ DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits, // Pointer types are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type), - TheCU, + NULL, //TheCU, MDString::get(VMContext, Name), NULL, // Filename ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line @@ -142,10 +229,11 @@ DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits, /// createReferenceType - Create debugging information entry for a reference. DIType DIBuilder::createReferenceType(DIType RTy) { + assert(RTy.Verify() && "Unable to create reference type"); // References are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_reference_type), - TheCU, + NULL, // TheCU, NULL, // Name NULL, // Filename ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line @@ -165,7 +253,7 @@ DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File, assert(Ty.Verify() && "Invalid typedef type!"); Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_typedef), - Context, + getNonCompileUnitScope(Context), MDString::get(VMContext, Name), File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), @@ -199,9 +287,10 @@ DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) { } /// createInheritance - Create debugging information entry to establish -/// inheritnace relationship between two types. -DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, +/// inheritance relationship between two types. +DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, uint64_t BaseOffset, unsigned Flags) { + assert(Ty.Verify() && "Unable to create inheritance"); // TAG_inheritance is encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_inheritance), @@ -219,15 +308,15 @@ DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, } /// createMemberType - Create debugging information entry for a member. -DIType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name, - DIFile File, unsigned LineNumber, +DIType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name, + DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, - uint64_t OffsetInBits, unsigned Flags, + uint64_t OffsetInBits, unsigned Flags, DIType Ty) { // TAG_member is encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_member), - Scope, + getNonCompileUnitScope(Scope), MDString::get(VMContext, Name), File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), @@ -242,17 +331,17 @@ DIType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name, /// createObjCIVar - Create debugging information entry for Objective-C /// instance variable. -DIType DIBuilder::createObjCIVar(StringRef Name, - DIFile File, unsigned LineNumber, +DIType DIBuilder::createObjCIVar(StringRef Name, + DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, - uint64_t OffsetInBits, unsigned Flags, + uint64_t OffsetInBits, unsigned Flags, DIType Ty, StringRef PropertyName, StringRef GetterName, StringRef SetterName, unsigned PropertyAttributes) { // TAG_member is encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_member), - File, // Or TheCU ? Ty ? + getNonCompileUnitScope(File), MDString::get(VMContext, Name), File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), @@ -270,8 +359,8 @@ DIType DIBuilder::createObjCIVar(StringRef Name, } /// createClassType - Create debugging information entry for a class. -DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, - DIFile File, unsigned LineNumber, +DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, + DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags, DIType DerivedFrom, DIArray Elements, @@ -279,7 +368,7 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, // TAG_class_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_class_type), - Context, + getNonCompileUnitScope(Context), MDString::get(VMContext, Name), File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), @@ -298,13 +387,13 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, /// createTemplateTypeParameter - Create debugging information for template /// type parameter. -DITemplateTypeParameter +DITemplateTypeParameter DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name, DIType Ty, MDNode *File, unsigned LineNo, unsigned ColumnNo) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_template_type_parameter), - Context, + getNonCompileUnitScope(Context), MDString::get(VMContext, Name), Ty, File, @@ -316,14 +405,14 @@ DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name, /// createTemplateValueParameter - Create debugging information for template /// value parameter. -DITemplateValueParameter +DITemplateValueParameter DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name, DIType Ty, uint64_t Val, MDNode *File, unsigned LineNo, unsigned ColumnNo) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_template_value_parameter), - Context, + getNonCompileUnitScope(Context), MDString::get(VMContext, Name), Ty, ConstantInt::get(Type::getInt64Ty(VMContext), Val), @@ -335,15 +424,15 @@ DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name, } /// createStructType - Create debugging information entry for a struct. -DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name, - DIFile File, unsigned LineNumber, +DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name, + DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, - unsigned Flags, DIArray Elements, + unsigned Flags, DIArray Elements, unsigned RunTimeLang) { // TAG_structure_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_structure_type), - Context, + getNonCompileUnitScope(Context), MDString::get(VMContext, Name), File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), @@ -360,7 +449,7 @@ DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name, } /// createUnionType - Create debugging information entry for an union. -DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, +DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags, @@ -368,7 +457,7 @@ DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, // TAG_union_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_union_type), - Scope, + getNonCompileUnitScope(Scope), MDString::get(VMContext, Name), File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), @@ -389,9 +478,9 @@ DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) { // TAG_subroutine_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type), - File, + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), MDString::get(VMContext, ""), - File, + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), ConstantInt::get(Type::getInt32Ty(VMContext), 0), ConstantInt::get(Type::getInt64Ty(VMContext), 0), ConstantInt::get(Type::getInt64Ty(VMContext), 0), @@ -405,16 +494,17 @@ DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) { return DIType(MDNode::get(VMContext, Elts)); } -/// createEnumerationType - Create debugging information entry for an +/// createEnumerationType - Create debugging information entry for an /// enumeration. -DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name, - DIFile File, unsigned LineNumber, - uint64_t SizeInBits, - uint64_t AlignInBits, DIArray Elements) { +DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, + uint64_t AlignInBits, + DIArray Elements) { // TAG_enumeration_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type), - Scope, + getNonCompileUnitScope(Scope), MDString::get(VMContext, Name), File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), @@ -428,20 +518,19 @@ DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name, llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), }; MDNode *Node = MDNode::get(VMContext, Elts); - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum"); - NMD->addOperand(Node); + AllEnumTypes.push_back(Node); return DIType(Node); } /// createArrayType - Create debugging information entry for an array. -DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, +DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, DIType Ty, DIArray Subscripts) { // TAG_array_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_array_type), - TheCU, + NULL, //TheCU, MDString::get(VMContext, ""), - TheCU, + NULL, //TheCU, ConstantInt::get(Type::getInt32Ty(VMContext), 0), ConstantInt::get(Type::getInt64Ty(VMContext), Size), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -456,14 +545,14 @@ DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, } /// createVectorType - Create debugging information entry for a vector. -DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits, +DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits, DIType Ty, DIArray Subscripts) { // TAG_vector_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_vector_type), - TheCU, + NULL, //TheCU, MDString::get(VMContext, ""), - TheCU, + NULL, //TheCU, ConstantInt::get(Type::getInt32Ty(VMContext), 0), ConstantInt::get(Type::getInt64Ty(VMContext), Size), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -501,18 +590,17 @@ DIType DIBuilder::createArtificialType(DIType Ty) { return DIType(MDNode::get(VMContext, Elts)); } -/// retainType - Retain DIType in a module even if it is not referenced +/// retainType - Retain DIType in a module even if it is not referenced /// through debug info anchors. void DIBuilder::retainType(DIType T) { - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.ty"); - NMD->addOperand(T); + AllRetainTypes.push_back(T); } /// createUnspecifiedParameter - Create unspeicified type descriptor /// for the subroutine type. DIDescriptor DIBuilder::createUnspecifiedParameter() { - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters) + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters) }; return DIDescriptor(MDNode::get(VMContext, Elts)); } @@ -532,7 +620,7 @@ DIType DIBuilder::createTemporaryType(DIFile F) { // use here as long as DIType accepts it. Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type), - F.getCompileUnit(), + TheCU, NULL, F }; @@ -563,12 +651,12 @@ DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Hi) { /// createGlobalVariable - Create a new descriptor for the specified global. DIGlobalVariable DIBuilder:: -createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, +createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, DIType Ty, bool isLocalToUnit, llvm::Value *Val) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_variable), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - TheCU, + NULL, // TheCU, MDString::get(VMContext, Name), MDString::get(VMContext, Name), MDString::get(VMContext, Name), @@ -580,22 +668,20 @@ createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, Val }; MDNode *Node = MDNode::get(VMContext, Elts); - // Create a named metadata so that we do not lose this mdnode. - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); - NMD->addOperand(Node); + AllGVs.push_back(Node); return DIGlobalVariable(Node); } /// createStaticVariable - Create a new descriptor for the specified static /// variable. DIGlobalVariable DIBuilder:: -createStaticVariable(DIDescriptor Context, StringRef Name, - StringRef LinkageName, DIFile F, unsigned LineNumber, +createStaticVariable(DIDescriptor Context, StringRef Name, + StringRef LinkageName, DIFile F, unsigned LineNumber, DIType Ty, bool isLocalToUnit, llvm::Value *Val) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_variable), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - Context, + getNonCompileUnitScope(Context), MDString::get(VMContext, Name), MDString::get(VMContext, Name), MDString::get(VMContext, LinkageName), @@ -607,26 +693,25 @@ createStaticVariable(DIDescriptor Context, StringRef Name, Val }; MDNode *Node = MDNode::get(VMContext, Elts); - // Create a named metadata so that we do not lose this mdnode. - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); - NMD->addOperand(Node); + AllGVs.push_back(Node); return DIGlobalVariable(Node); } /// createVariable - Create a new descriptor for the specified variable. DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope, StringRef Name, DIFile File, - unsigned LineNo, DIType Ty, + unsigned LineNo, DIType Ty, bool AlwaysPreserve, unsigned Flags, unsigned ArgNo) { Value *Elts[] = { GetTagConstant(VMContext, Tag), - Scope, + getNonCompileUnitScope(Scope), MDString::get(VMContext, Name), File, ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))), Ty, - ConstantInt::get(Type::getInt32Ty(VMContext), Flags) + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + Constant::getNullValue(Type::getInt32Ty(VMContext)), }; MDNode *Node = MDNode::get(VMContext, Elts); if (AlwaysPreserve) { @@ -634,13 +719,7 @@ DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope, // to preserve variable info in such situation then stash it in a // named mdnode. DISubprogram Fn(getDISubprogram(Scope)); - StringRef FName = "fn"; - if (Fn.getFunction()) - FName = Fn.getFunction()->getName(); - char One = '\1'; - if (FName.startswith(StringRef(&One, 1))) - FName = FName.substr(1); - NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, FName); + NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, Fn); FnLocals->addOperand(Node); } return DIVariable(Node); @@ -655,12 +734,14 @@ DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope, unsigned ArgNo) { SmallVector<Value *, 15> Elts; Elts.push_back(GetTagConstant(VMContext, Tag)); - Elts.push_back(Scope); + Elts.push_back(getNonCompileUnitScope(Scope)), Elts.push_back(MDString::get(VMContext, Name)); Elts.push_back(F); - Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24)))); + Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), + (LineNo | (ArgNo << 24)))); Elts.push_back(Ty); Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext))); + Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext))); Elts.append(Addr.begin(), Addr.end()); return DIVariable(MDNode::get(VMContext, Elts)); @@ -677,10 +758,15 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, Function *Fn, MDNode *TParams, MDNode *Decl) { + Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; + MDNode *Temp = MDNode::getTemporary(VMContext, TElts); + Value *TVElts[] = { Temp }; + MDNode *THolder = MDNode::get(VMContext, TVElts); + Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - Context, + getNonCompileUnitScope(Context), MDString::get(VMContext, Name), MDString::get(VMContext, Name), MDString::get(VMContext, LinkageName), @@ -696,13 +782,13 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), Fn, TParams, - Decl + Decl, + THolder }; MDNode *Node = MDNode::get(VMContext, Elts); // Create a named metadata so that we do not lose this mdnode. - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); - NMD->addOperand(Node); + AllSubprograms.push_back(Node); return DISubprogram(Node); } @@ -720,10 +806,15 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, bool isOptimized, Function *Fn, MDNode *TParam) { + Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; + MDNode *Temp = MDNode::getTemporary(VMContext, TElts); + Value *TVElts[] = { Temp }; + MDNode *THolder = MDNode::get(VMContext, TVElts); + Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - Context, + getNonCompileUnitScope(Context), MDString::get(VMContext, Name), MDString::get(VMContext, Name), MDString::get(VMContext, LinkageName), @@ -739,12 +830,10 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), Fn, TParam, + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + THolder }; MDNode *Node = MDNode::get(VMContext, Elts); - - // Create a named metadata so that we do not lose this mdnode. - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); - NMD->addOperand(Node); return DISubprogram(Node); } @@ -754,7 +843,7 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNo) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_namespace), - Scope, + getNonCompileUnitScope(Scope), MDString::get(VMContext, Name), File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo) @@ -762,13 +851,25 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name, return DINameSpace(MDNode::get(VMContext, Elts)); } +/// createLexicalBlockFile - This creates a new MDNode that encapsulates +/// an existing scope with a new filename. +DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope, + DIFile File) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block), + Scope, + File + }; + return DILexicalBlockFile(MDNode::get(VMContext, Elts)); +} + DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File, unsigned Line, unsigned Col) { // Defeat MDNode uniqing for lexical blocks by using unique id. static unsigned int unique_id = 0; Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block), - Scope, + getNonCompileUnitScope(Scope), ConstantInt::get(Type::getInt32Ty(VMContext), Line), ConstantInt::get(Type::getInt32Ty(VMContext), Col), File, @@ -836,4 +937,3 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, VarInfo }; return CallInst::Create(ValueFn, Args, "", InsertAtEnd); } - diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp index b23c351..cd832ab 100644 --- a/lib/Analysis/DbgInfoPrinter.cpp +++ b/lib/Analysis/DbgInfoPrinter.cpp @@ -171,7 +171,7 @@ static bool getLocationInfo(const Value *V, std::string &DisplayName, void PrintDbgInfo::printVariableDeclaration(const Value *V) { std::string DisplayName, File, Directory, Type; - unsigned LineNo; + unsigned LineNo = 0; if (!getLocationInfo(V, DisplayName, Type, LineNo, File, Directory)) return; diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp index b42e946..44457d3 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/Analysis/DebugInfo.cpp @@ -39,6 +39,9 @@ DIDescriptor::DIDescriptor(const DIFile F) : DbgNode(F.DbgNode) { DIDescriptor::DIDescriptor(const DISubprogram F) : DbgNode(F.DbgNode) { } +DIDescriptor::DIDescriptor(const DILexicalBlockFile F) : DbgNode(F.DbgNode) { +} + DIDescriptor::DIDescriptor(const DILexicalBlock F) : DbgNode(F.DbgNode) { } @@ -111,9 +114,17 @@ Function *DIDescriptor::getFunctionField(unsigned Elt) const { unsigned DIVariable::getNumAddrElements() const { if (getVersion() <= llvm::LLVMDebugVersion8) return DbgNode->getNumOperands()-6; - return DbgNode->getNumOperands()-7; + if (getVersion() == llvm::LLVMDebugVersion9) + return DbgNode->getNumOperands()-7; + return DbgNode->getNumOperands()-8; } +/// getInlinedAt - If this variable is inlined then return inline location. +MDNode *DIVariable::getInlinedAt() const { + if (getVersion() <= llvm::LLVMDebugVersion9) + return NULL; + return dyn_cast_or_null<MDNode>(DbgNode->getOperand(7)); +} //===----------------------------------------------------------------------===// // Predicates @@ -122,7 +133,14 @@ unsigned DIVariable::getNumAddrElements() const { /// isBasicType - Return true if the specified tag is legal for /// DIBasicType. bool DIDescriptor::isBasicType() const { - return DbgNode && getTag() == dwarf::DW_TAG_base_type; + if (!DbgNode) return false; + switch (getTag()) { + case dwarf::DW_TAG_base_type: + case dwarf::DW_TAG_unspecified_type: + return true; + default: + return false; + } } /// isDerivedType - Return true if the specified tag is legal for DIDerivedType. @@ -248,9 +266,17 @@ bool DIDescriptor::isNameSpace() const { return DbgNode && getTag() == dwarf::DW_TAG_namespace; } +/// isLexicalBlockFile - Return true if the specified descriptor is a +/// lexical block with an extra file. +bool DIDescriptor::isLexicalBlockFile() const { + return DbgNode && getTag() == dwarf::DW_TAG_lexical_block && + (DbgNode->getNumOperands() == 3); +} + /// isLexicalBlock - Return true if the specified tag is DW_TAG_lexical_block. bool DIDescriptor::isLexicalBlock() const { - return DbgNode && getTag() == dwarf::DW_TAG_lexical_block; + return DbgNode && getTag() == dwarf::DW_TAG_lexical_block && + (DbgNode->getNumOperands() > 3); } /// isSubrange - Return true if the specified tag is DW_TAG_subrange_type. @@ -320,6 +346,22 @@ void DIType::replaceAllUsesWith(MDNode *D) { } } +/// isUnsignedDIType - Return true if type encoding is unsigned. +bool DIType::isUnsignedDIType() { + DIDerivedType DTy(DbgNode); + if (DTy.Verify()) + return DTy.getTypeDerivedFrom().isUnsignedDIType(); + + DIBasicType BTy(DbgNode); + if (BTy.Verify()) { + unsigned Encoding = BTy.getEncoding(); + if (Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char) + return true; + } + return false; +} + /// Verify - Verify that a compile unit is well formed. bool DICompileUnit::Verify() const { if (!DbgNode) @@ -335,7 +377,7 @@ bool DICompileUnit::Verify() const { bool DIType::Verify() const { if (!DbgNode) return false; - if (!getContext().Verify()) + if (getContext() && !getContext().Verify()) return false; unsigned Tag = getTag(); if (!isBasicType() && Tag != dwarf::DW_TAG_const_type && @@ -343,6 +385,7 @@ bool DIType::Verify() const { Tag != dwarf::DW_TAG_reference_type && Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_vector_type && Tag != dwarf::DW_TAG_array_type && Tag != dwarf::DW_TAG_enumeration_type + && Tag != dwarf::DW_TAG_subroutine_type && getFilename().empty()) return false; return true; @@ -362,12 +405,9 @@ bool DIDerivedType::Verify() const { bool DICompositeType::Verify() const { if (!DbgNode) return false; - if (!getContext().Verify()) + if (getContext() && !getContext().Verify()) return false; - DICompileUnit CU = getCompileUnit(); - if (!CU.Verify()) - return false; return true; } @@ -376,11 +416,7 @@ bool DISubprogram::Verify() const { if (!DbgNode) return false; - if (!getContext().Verify()) - return false; - - DICompileUnit CU = getCompileUnit(); - if (!CU.Verify()) + if (getContext() && !getContext().Verify()) return false; DICompositeType Ty = getType(); @@ -397,11 +433,7 @@ bool DIGlobalVariable::Verify() const { if (getDisplayName().empty()) return false; - if (!getContext().Verify()) - return false; - - DICompileUnit CU = getCompileUnit(); - if (!CU.Verify()) + if (getContext() && !getContext().Verify()) return false; DIType Ty = getType(); @@ -419,10 +451,7 @@ bool DIVariable::Verify() const { if (!DbgNode) return false; - if (!getContext().Verify()) - return false; - - if (!getCompileUnit().Verify()) + if (getContext() && !getContext().Verify()) return false; DIType Ty = getType(); @@ -446,8 +475,6 @@ bool DINameSpace::Verify() const { return false; if (getName().empty()) return false; - if (!getCompileUnit().Verify()) - return false; return true; } @@ -504,9 +531,28 @@ unsigned DISubprogram::isOptimized() const { return 0; } +MDNode *DISubprogram::getVariablesNodes() const { + if (!DbgNode || DbgNode->getNumOperands() <= 19) + return NULL; + if (MDNode *Temp = dyn_cast_or_null<MDNode>(DbgNode->getOperand(19))) + return dyn_cast_or_null<MDNode>(Temp->getOperand(0)); + return NULL; +} + +DIArray DISubprogram::getVariables() const { + if (!DbgNode || DbgNode->getNumOperands() <= 19) + return DIArray(); + if (MDNode *T = dyn_cast_or_null<MDNode>(DbgNode->getOperand(19))) + if (MDNode *A = dyn_cast_or_null<MDNode>(T->getOperand(0))) + return DIArray(A); + return DIArray(); +} + StringRef DIScope::getFilename() const { if (!DbgNode) return StringRef(); + if (isLexicalBlockFile()) + return DILexicalBlockFile(DbgNode).getFilename(); if (isLexicalBlock()) return DILexicalBlock(DbgNode).getFilename(); if (isSubprogram()) @@ -526,6 +572,8 @@ StringRef DIScope::getFilename() const { StringRef DIScope::getDirectory() const { if (!DbgNode) return StringRef(); + if (isLexicalBlockFile()) + return DILexicalBlockFile(DbgNode).getDirectory(); if (isLexicalBlock()) return DILexicalBlock(DbgNode).getDirectory(); if (isSubprogram()) @@ -542,6 +590,47 @@ StringRef DIScope::getDirectory() const { return StringRef(); } +DIArray DICompileUnit::getEnumTypes() const { + if (!DbgNode || DbgNode->getNumOperands() < 14) + return DIArray(); + + if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(10))) + if (MDNode *A = dyn_cast_or_null<MDNode>(N->getOperand(0))) + return DIArray(A); + return DIArray(); +} + +DIArray DICompileUnit::getRetainedTypes() const { + if (!DbgNode || DbgNode->getNumOperands() < 14) + return DIArray(); + + if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(11))) + if (MDNode *A = dyn_cast_or_null<MDNode>(N->getOperand(0))) + return DIArray(A); + return DIArray(); +} + +DIArray DICompileUnit::getSubprograms() const { + if (!DbgNode || DbgNode->getNumOperands() < 14) + return DIArray(); + + if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(12))) + if (MDNode *A = dyn_cast_or_null<MDNode>(N->getOperand(0))) + return DIArray(A); + return DIArray(); +} + + +DIArray DICompileUnit::getGlobalVariables() const { + if (!DbgNode || DbgNode->getNumOperands() < 14) + return DIArray(); + + if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(13))) + if (MDNode *A = dyn_cast_or_null<MDNode>(N->getOperand(0))) + return DIArray(A); + return DIArray(); +} + //===----------------------------------------------------------------------===// // DIDescriptor: dump routines for all descriptors. //===----------------------------------------------------------------------===// @@ -573,7 +662,6 @@ void DIType::print(raw_ostream &OS) const { OS << " [" << dwarf::TagString(Tag) << "] "; // TODO : Print context - getCompileUnit().print(OS); OS << " [" << "line " << getLineNumber() << ", " << getSizeInBits() << " bits, " @@ -629,7 +717,6 @@ void DISubprogram::print(raw_ostream &OS) const { OS << " [" << dwarf::TagString(Tag) << "] "; // TODO : Print context - getCompileUnit().print(OS); OS << " [" << getLineNumber() << "] "; if (isLocalToUnit()) @@ -652,7 +739,6 @@ void DIGlobalVariable::print(raw_ostream &OS) const { OS << " [" << dwarf::TagString(Tag) << "] "; // TODO : Print context - getCompileUnit().print(OS); OS << " [" << getLineNumber() << "] "; if (isLocalToUnit()) @@ -666,13 +752,48 @@ void DIGlobalVariable::print(raw_ostream &OS) const { OS << "]\n"; } +static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS, + const LLVMContext &Ctx) { + if (!DL.isUnknown()) { // Print source line info. + DIScope Scope(DL.getScope(Ctx)); + // Omit the directory, because it's likely to be long and uninteresting. + if (Scope.Verify()) + CommentOS << Scope.getFilename(); + else + CommentOS << "<unknown>"; + CommentOS << ':' << DL.getLine(); + if (DL.getCol() != 0) + CommentOS << ':' << DL.getCol(); + DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx)); + if (!InlinedAtDL.isUnknown()) { + CommentOS << " @[ "; + printDebugLoc(InlinedAtDL, CommentOS, Ctx); + CommentOS << " ]"; + } + } +} + +void DIVariable::printExtendedName(raw_ostream &OS) const { + const LLVMContext &Ctx = DbgNode->getContext(); + StringRef Res = getName(); + if (!Res.empty()) + OS << Res << "," << getLineNumber(); + if (MDNode *InlinedAt = getInlinedAt()) { + DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt); + if (!InlinedAtDL.isUnknown()) { + OS << " @["; + printDebugLoc(InlinedAtDL, OS, Ctx); + OS << "]"; + } + } +} + /// print - Print variable. void DIVariable::print(raw_ostream &OS) const { StringRef Res = getName(); if (!Res.empty()) OS << " [" << Res << "] "; - getCompileUnit().print(OS); OS << " [" << getLineNumber() << "] "; getType().print(OS); OS << "\n"; @@ -744,22 +865,61 @@ static void fixupObjcLikeName(StringRef Str, SmallVectorImpl<char> &Out) { /// getFnSpecificMDNode - Return a NameMDNode, if available, that is /// suitable to hold function specific information. -NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, StringRef FuncName) { +NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, DISubprogram Fn) { SmallString<32> Name = StringRef("llvm.dbg.lv."); - fixupObjcLikeName(FuncName, Name); - + StringRef FName = "fn"; + if (Fn.getFunction()) + FName = Fn.getFunction()->getName(); + else + FName = Fn.getName(); + char One = '\1'; + if (FName.startswith(StringRef(&One, 1))) + FName = FName.substr(1); + fixupObjcLikeName(FName, Name); return M.getNamedMetadata(Name.str()); } /// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable /// to hold function specific information. -NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, StringRef FuncName) { +NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, DISubprogram Fn) { SmallString<32> Name = StringRef("llvm.dbg.lv."); - fixupObjcLikeName(FuncName, Name); - + StringRef FName = "fn"; + if (Fn.getFunction()) + FName = Fn.getFunction()->getName(); + else + FName = Fn.getName(); + char One = '\1'; + if (FName.startswith(StringRef(&One, 1))) + FName = FName.substr(1); + fixupObjcLikeName(FName, Name); + return M.getOrInsertNamedMetadata(Name.str()); } +/// createInlinedVariable - Create a new inlined variable based on current +/// variable. +/// @param DV Current Variable. +/// @param InlinedScope Location at current variable is inlined. +DIVariable llvm::createInlinedVariable(MDNode *DV, MDNode *InlinedScope, + LLVMContext &VMContext) { + SmallVector<Value *, 16> Elts; + // Insert inlined scope as 7th element. + for (unsigned i = 0, e = DV->getNumOperands(); i != e; ++i) + i == 7 ? Elts.push_back(InlinedScope) : + Elts.push_back(DV->getOperand(i)); + return DIVariable(MDNode::get(VMContext, Elts)); +} + +/// cleanseInlinedVariable - Remove inlined scope from the variable. +DIVariable llvm::cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext) { + SmallVector<Value *, 16> Elts; + // Insert inlined scope as 7th element. + for (unsigned i = 0, e = DV->getNumOperands(); i != e; ++i) + i == 7 ? + Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext))): + Elts.push_back(DV->getOperand(i)); + return DIVariable(MDNode::get(VMContext, Elts)); +} //===----------------------------------------------------------------------===// // DebugInfoFinder implementations. @@ -767,6 +927,10 @@ NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, StringRef FuncName) { /// processModule - Process entire module and collect debug info. void DebugInfoFinder::processModule(Module &M) { + if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu")) + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) + addCompileUnit(DICompileUnit(CU_Nodes->getOperand(i))); + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI) for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE; @@ -785,6 +949,10 @@ void DebugInfoFinder::processModule(Module &M) { addCompileUnit(DICompileUnit(Scope)); else if (Scope.isSubprogram()) processSubprogram(DISubprogram(Scope)); + else if (Scope.isLexicalBlockFile()) { + DILexicalBlockFile DBF = DILexicalBlockFile(Scope); + processLexicalBlock(DILexicalBlock(DBF.getScope())); + } else if (Scope.isLexicalBlock()) processLexicalBlock(DILexicalBlock(Scope)); @@ -796,7 +964,8 @@ void DebugInfoFinder::processModule(Module &M) { for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { DIGlobalVariable DIG(cast<MDNode>(NMD->getOperand(i))); if (addGlobalVariable(DIG)) { - addCompileUnit(DIG.getCompileUnit()); + if (DIG.getVersion() <= LLVMDebugVersion10) + addCompileUnit(DIG.getCompileUnit()); processType(DIG.getType()); } } @@ -817,6 +986,10 @@ void DebugInfoFinder::processLocation(DILocation Loc) { processSubprogram(DISubprogram(S)); else if (S.isLexicalBlock()) processLexicalBlock(DILexicalBlock(S)); + else if (S.isLexicalBlockFile()) { + DILexicalBlockFile DBF = DILexicalBlockFile(S); + processLexicalBlock(DILexicalBlock(DBF.getScope())); + } processLocation(Loc.getOrigLocation()); } @@ -824,8 +997,8 @@ void DebugInfoFinder::processLocation(DILocation Loc) { void DebugInfoFinder::processType(DIType DT) { if (!addType(DT)) return; - - addCompileUnit(DT.getCompileUnit()); + if (DT.getVersion() <= LLVMDebugVersion10) + addCompileUnit(DT.getCompileUnit()); if (DT.isCompositeType()) { DICompositeType DCT(DT); processType(DCT.getTypeDerivedFrom()); @@ -848,6 +1021,10 @@ void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) { DIScope Context = LB.getContext(); if (Context.isLexicalBlock()) return processLexicalBlock(DILexicalBlock(Context)); + else if (Context.isLexicalBlockFile()) { + DILexicalBlockFile DBF = DILexicalBlockFile(Context); + return processLexicalBlock(DILexicalBlock(DBF.getScope())); + } else return processSubprogram(DISubprogram(Context)); } @@ -856,7 +1033,8 @@ void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) { void DebugInfoFinder::processSubprogram(DISubprogram SP) { if (!addSubprogram(SP)) return; - addCompileUnit(SP.getCompileUnit()); + if (SP.getVersion() <= LLVMDebugVersion10) + addCompileUnit(SP.getCompileUnit()); processType(SP.getType()); } @@ -871,8 +1049,8 @@ void DebugInfoFinder::processDeclare(DbgDeclareInst *DDI) { if (!NodesSeen.insert(DV)) return; - - addCompileUnit(DIVariable(N).getCompileUnit()); + if (DIVariable(N).getVersion() <= LLVMDebugVersion10) + addCompileUnit(DIVariable(N).getCompileUnit()); processType(DIVariable(N).getType()); } @@ -930,6 +1108,9 @@ DISubprogram llvm::getDISubprogram(const MDNode *Scope) { if (D.isSubprogram()) return DISubprogram(Scope); + if (D.isLexicalBlockFile()) + return getDISubprogram(DILexicalBlockFile(Scope).getContext()); + if (D.isLexicalBlock()) return getDISubprogram(DILexicalBlock(Scope).getContext()); @@ -946,3 +1127,17 @@ DICompositeType llvm::getDICompositeType(DIType T) { return DICompositeType(); } + +/// isSubprogramContext - Return true if Context is either a subprogram +/// or another context nested inside a subprogram. +bool llvm::isSubprogramContext(const MDNode *Context) { + if (!Context) + return false; + DIDescriptor D(Context); + if (D.isSubprogram()) + return true; + if (D.isType()) + return isSubprogramContext(DIType(Context).getContext()); + return false; +} + diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt index 8ffef29..eae83fd 100644 --- a/lib/Analysis/IPA/CMakeLists.txt +++ b/lib/Analysis/IPA/CMakeLists.txt @@ -5,3 +5,9 @@ add_llvm_library(LLVMipa GlobalsModRef.cpp IPA.cpp ) + +add_llvm_library_dependencies(LLVMipa + LLVMAnalysis + LLVMCore + LLVMSupport + ) diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp index 659ffab..963da75 100644 --- a/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -44,8 +44,8 @@ namespace { class CGPassManager : public ModulePass, public PMDataManager { public: static char ID; - explicit CGPassManager(int Depth) - : ModulePass(ID), PMDataManager(Depth) { } + explicit CGPassManager() + : ModulePass(ID), PMDataManager() { } /// run - Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the module, and if so, return true. @@ -350,6 +350,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, dbgs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n"; } ); + (void)MadeChange; return DevirtualizedCall; } @@ -542,7 +543,7 @@ void CallGraphSCCPass::assignPassManager(PMStack &PMS, PMDataManager *PMD = PMS.top(); // [1] Create new Call Graph Pass Manager - CGP = new CGPassManager(PMD->getDepth() + 1); + CGP = new CGPassManager(); // [2] Set up new manager's top level manager PMTopLevelManager *TPM = PMD->getTopLevelManager(); diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp index 6535786..e9df3ca 100644 --- a/lib/Analysis/IPA/FindUsedTypes.cpp +++ b/lib/Analysis/IPA/FindUsedTypes.cpp @@ -29,7 +29,7 @@ INITIALIZE_PASS(FindUsedTypes, "print-used-types", // IncorporateType - Incorporate one type and all of its subtypes into the // collection of used types. // -void FindUsedTypes::IncorporateType(const Type *Ty) { +void FindUsedTypes::IncorporateType(Type *Ty) { // If ty doesn't already exist in the used types map, add it now, otherwise // return. if (!UsedTypes.insert(Ty)) return; // Already contain Ty. @@ -94,7 +94,7 @@ bool FindUsedTypes::runOnModule(Module &m) { // void FindUsedTypes::print(raw_ostream &OS, const Module *M) const { OS << "Types in use by this module:\n"; - for (SetVector<const Type *>::const_iterator I = UsedTypes.begin(), + for (SetVector<Type *>::const_iterator I = UsedTypes.begin(), E = UsedTypes.end(); I != E; ++I) { OS << " " << **I << '\n'; } diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index e5f0a77..d0ca892 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -146,7 +146,8 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { ISE, User, I, NewUse.PostIncLoops, *SE, *DT); - DEBUG(dbgs() << " NORMALIZED TO: " << *ISE << '\n'); + DEBUG(if (SE->getSCEV(I) != ISE) + dbgs() << " NORMALIZED TO: " << *ISE << '\n'); } } return true; diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index efde598..e12e322 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -15,6 +15,7 @@ #include "llvm/Support/CallSite.h" #include "llvm/CallingConv.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Target/TargetData.h" #include "llvm/ADT/SmallPtrSet.h" using namespace llvm; @@ -24,13 +25,13 @@ using namespace llvm; /// TODO: Perhaps calls like memcpy, strcpy, etc? bool llvm::callIsSmall(const Function *F) { if (!F) return false; - + if (F->hasLocalLinkage()) return false; - + if (!F->hasName()) return false; - + StringRef Name = F->getName(); - + // These will all likely lower to a single selection DAG node. if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || Name == "fabs" || Name == "fabsf" || Name == "fabsl" || @@ -38,7 +39,7 @@ bool llvm::callIsSmall(const Function *F) { Name == "cos" || Name == "cosf" || Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ) return true; - + // These are all likely to be optimized into something smaller. if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || Name == "exp2l" || Name == "exp2f" || @@ -46,13 +47,14 @@ bool llvm::callIsSmall(const Function *F) { Name == "round" || Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" || Name == "llabs") return true; - + return false; } /// analyzeBasicBlock - Fill in the current structure with information gleaned /// from the specified block. -void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { +void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, + const TargetData *TD) { ++NumBlocks; unsigned NumInstsBeforeThisBB = NumInsts; for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); @@ -67,8 +69,8 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { ImmutableCallSite CS(cast<Instruction>(II)); if (const Function *F = CS.getCalledFunction()) { - // If a function is both internal and has a single use, then it is - // extremely likely to get inlined in the future (it was probably + // If a function is both internal and has a single use, then it is + // extremely likely to get inlined in the future (it was probably // exposed by an interleaved devirtualization pass). if (F->hasInternalLinkage() && F->hasOneUse()) ++NumInlineCandidates; @@ -91,20 +93,25 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { ++NumCalls; } } - + if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { if (!AI->isStaticAlloca()) this->usesDynamicAlloca = true; } if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy()) - ++NumVectorInsts; - + ++NumVectorInsts; + if (const CastInst *CI = dyn_cast<CastInst>(II)) { // Noop casts, including ptr <-> int, don't count. - if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || + if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || isa<PtrToIntInst>(CI)) continue; + // trunc to a native type is free (assuming the target has compare and + // shift-right of the same width). + if (isa<TruncInst>(CI) && TD && + TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType()))) + continue; // Result of a cmp instruction is often extended (to be used by other // cmp instructions, logical or return instructions). These are usually // nop on most sane targets. @@ -119,10 +126,10 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { ++NumInsts; } - + if (isa<ReturnInst>(BB->getTerminator())) ++NumRets; - + // We never want to inline functions that contain an indirectbr. This is // incorrect because all the blockaddress's (in static global initializers // for example) would be referring to the original function, and this indirect @@ -217,7 +224,7 @@ unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) { /// analyzeFunction - Fill in the current structure with information gleaned /// from the specified function. -void CodeMetrics::analyzeFunction(Function *F) { +void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) { // If this function contains a call to setjmp or _setjmp, never inline // it. This is a hack because we depend on the user marking their local // variables as volatile if they are live across a setjmp call, and they @@ -227,13 +234,14 @@ void CodeMetrics::analyzeFunction(Function *F) { // Look at the size of the callee. for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - analyzeBasicBlock(&*BB); + analyzeBasicBlock(&*BB, TD); } /// analyzeFunction - Fill in the current structure with information gleaned /// from the specified function. -void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { - Metrics.analyzeFunction(F); +void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F, + const TargetData *TD) { + Metrics.analyzeFunction(F, TD); // A function with exactly one return has it removed during the inlining // process (see InlineFunction), so don't count it. @@ -252,7 +260,7 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { /// NeverInline - returns true if the function should never be inlined into /// any caller bool InlineCostAnalyzer::FunctionInfo::NeverInline() { - return (Metrics.callsSetJmp || Metrics.isRecursive || + return (Metrics.callsSetJmp || Metrics.isRecursive || Metrics.containsIndirectBr); } // getSpecializationBonus - The heuristic used to determine the per-call @@ -263,19 +271,19 @@ int InlineCostAnalyzer::getSpecializationBonus(Function *Callee, { if (Callee->mayBeOverridden()) return 0; - + int Bonus = 0; // If this function uses the coldcc calling convention, prefer not to // specialize it. if (Callee->getCallingConv() == CallingConv::Cold) Bonus -= InlineConstants::ColdccPenalty; - + // Get information about the callee. FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; - + // If we haven't calculated this information yet, do so now. if (CalleeFI->Metrics.NumBlocks == 0) - CalleeFI->analyzeFunction(Callee); + CalleeFI->analyzeFunction(Callee, TD); unsigned ArgNo = 0; unsigned i = 0; @@ -286,7 +294,7 @@ int InlineCostAnalyzer::getSpecializationBonus(Function *Callee, Bonus += CountBonusForConstant(I); } - // Calls usually take a long time, so they make the specialization gain + // Calls usually take a long time, so they make the specialization gain // smaller. Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty; @@ -300,13 +308,13 @@ int InlineCostAnalyzer::getSpecializationBonus(Function *Callee, // inlining because we decide we don't want to give a bonus for // devirtualizing. int InlineCostAnalyzer::ConstantFunctionBonus(CallSite CS, Constant *C) { - + // This could just be NULL. if (!C) return 0; - + Function *F = dyn_cast<Function>(C); if (!F) return 0; - + int Bonus = InlineConstants::IndirectCallBonus + getInlineSize(CS, F); return (Bonus > 0) ? 0 : Bonus; } @@ -355,18 +363,18 @@ int InlineCostAnalyzer::CountBonusForConstant(Value *V, Constant *C) { Bonus += CountBonusForConstant(&Inst); } } - + return Bonus; } int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) { // Get information about the callee. FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; - + // If we haven't calculated this information yet, do so now. if (CalleeFI->Metrics.NumBlocks == 0) - CalleeFI->analyzeFunction(Callee); - + CalleeFI->analyzeFunction(Callee, TD); + // InlineCost - This value measures how good of an inline candidate this call // site is to inline. A lower inline cost make is more likely for the call to // be inlined. This value may go negative. @@ -392,9 +400,9 @@ int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) { // weights calculated for the callee to determine how much will be folded // away with this information. else if (isa<Constant>(I)) - InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight; + InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight; } - + // Each argument passed in has a cost at both the caller and the callee // sides. Measurements show that each argument costs about the same as an // instruction. @@ -408,28 +416,28 @@ int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) { // Look at the size of the callee. Each instruction counts as 5. InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost; - + return InlineCost; } int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) { // Get information about the callee. FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; - + // If we haven't calculated this information yet, do so now. if (CalleeFI->Metrics.NumBlocks == 0) - CalleeFI->analyzeFunction(Callee); - + CalleeFI->analyzeFunction(Callee, TD); + bool isDirectCall = CS.getCalledFunction() == Callee; Instruction *TheCall = CS.getInstruction(); int Bonus = 0; - + // If there is only one call of the function, and it has internal linkage, // make it almost guaranteed to be inlined. // if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall) Bonus += InlineConstants::LastCallToStaticBonus; - + // If the instruction after the call, or if the normal destination of the // invoke is an unreachable instruction, the function is noreturn. As such, // there is little point in inlining this. @@ -438,12 +446,12 @@ int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) { Bonus += InlineConstants::NoreturnPenalty; } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall))) Bonus += InlineConstants::NoreturnPenalty; - + // If this function uses the coldcc calling convention, prefer not to inline // it. if (Callee->getCallingConv() == CallingConv::Cold) Bonus += InlineConstants::ColdccPenalty; - + // Add to the inline quality for properties that make the call valuable to // inline. This includes factors that indicate that the result of inlining // the function will be optimizable. Currently this just looks at arguments @@ -455,7 +463,7 @@ int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) { // Compute any constant bonus due to inlining we want to give here. if (isa<Constant>(I)) Bonus += CountBonusForConstant(FI, cast<Constant>(I)); - + return Bonus; } @@ -483,10 +491,10 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, // Get information about the callee. FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; - + // If we haven't calculated this information yet, do so now. if (CalleeFI->Metrics.NumBlocks == 0) - CalleeFI->analyzeFunction(Callee); + CalleeFI->analyzeFunction(Callee, TD); // If we should never inline this, return a huge cost. if (CalleeFI->NeverInline()) @@ -498,15 +506,15 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, // requires handling setjmp somewhere else, however. if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline)) return InlineCost::getAlways(); - + if (CalleeFI->Metrics.usesDynamicAlloca) { // Get information about the caller. FunctionInfo &CallerFI = CachedFunctionInfo[Caller]; // If we haven't calculated this information yet, do so now. if (CallerFI.Metrics.NumBlocks == 0) { - CallerFI.analyzeFunction(Caller); - + CallerFI.analyzeFunction(Caller, TD); + // Recompute the CalleeFI pointer, getting Caller could have invalidated // it. CalleeFI = &CachedFunctionInfo[Callee]; @@ -538,16 +546,16 @@ InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee, // something else. if (Callee->mayBeOverridden()) return llvm::InlineCost::getNever(); - + // Get information about the callee. FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; - + // If we haven't calculated this information yet, do so now. if (CalleeFI->Metrics.NumBlocks == 0) - CalleeFI->analyzeFunction(Callee); + CalleeFI->analyzeFunction(Callee, TD); int Cost = 0; - + // Look at the original size of the callee. Each instruction counts as 5. Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost; @@ -564,13 +572,13 @@ InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee, // higher threshold to determine if the function call should be inlined. float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) { Function *Callee = CS.getCalledFunction(); - + // Get information about the callee. FunctionInfo &CalleeFI = CachedFunctionInfo[Callee]; - + // If we haven't calculated this information yet, do so now. if (CalleeFI.Metrics.NumBlocks == 0) - CalleeFI.analyzeFunction(Callee); + CalleeFI.analyzeFunction(Callee, TD); float Factor = 1.0f; // Single BB functions are often written to be inlined. @@ -604,7 +612,7 @@ InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) { --CallerMetrics.NumCalls; if (Callee == 0) return; - + CodeMetrics &CalleeMetrics = CachedFunctionInfo[Callee].Metrics; // If we don't have metrics for the callee, don't recalculate them just to @@ -614,7 +622,7 @@ InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) { resetCachedCostInfo(Caller); return; } - + // Since CalleeMetrics were already calculated, we know that the CallerMetrics // reference isn't invalidated: both were in the DenseMap. CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca; @@ -636,7 +644,7 @@ InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) { CallerMetrics.NumInsts -= Callee->arg_size(); else CallerMetrics.NumInsts = 0; - + // We are not updating the argument weights. We have already determined that // Caller is a fairly large function, so we accept the loss of precision. } diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 8709f6b..131cc97 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -48,6 +48,26 @@ static Value *SimplifyOrInst(Value *, Value *, const TargetData *, static Value *SimplifyXorInst(Value *, Value *, const TargetData *, const DominatorTree *, unsigned); +/// getFalse - For a boolean type, or a vector of boolean type, return false, or +/// a vector with every element false, as appropriate for the type. +static Constant *getFalse(Type *Ty) { + assert((Ty->isIntegerTy(1) || + (Ty->isVectorTy() && + cast<VectorType>(Ty)->getElementType()->isIntegerTy(1))) && + "Expected i1 type or a vector of i1!"); + return Constant::getNullValue(Ty); +} + +/// getTrue - For a boolean type, or a vector of boolean type, return true, or +/// a vector with every element true, as appropriate for the type. +static Constant *getTrue(Type *Ty) { + assert((Ty->isIntegerTy(1) || + (Ty->isVectorTy() && + cast<VectorType>(Ty)->getElementType()->isIntegerTy(1))) && + "Expected i1 type or a vector of i1!"); + return Constant::getAllOnesValue(Ty); +} + /// ValueDominatesPHI - Does the given value dominate the specified phi node? static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { Instruction *I = dyn_cast<Instruction>(V); @@ -526,7 +546,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(), - Ops, 2, TD); + Ops, TD); } // Canonicalize the constant to the RHS. @@ -595,7 +615,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(), - Ops, 2, TD); + Ops, TD); } // X - undef -> undef @@ -715,7 +735,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD, if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(), - Ops, 2, TD); + Ops, TD); } // Canonicalize the constant to the RHS. @@ -788,7 +808,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, if (Constant *C0 = dyn_cast<Constant>(Op0)) { if (Constant *C1 = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { C0, C1 }; - return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD); + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD); } } @@ -909,7 +929,7 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, if (Constant *C0 = dyn_cast<Constant>(Op0)) { if (Constant *C1 = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { C0, C1 }; - return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD); + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD); } } @@ -1012,7 +1032,7 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, if (Constant *C0 = dyn_cast<Constant>(Op0)) { if (Constant *C1 = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { C0, C1 }; - return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD); + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD); } } @@ -1138,7 +1158,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::And, CLHS->getType(), - Ops, 2, TD); + Ops, TD); } // Canonicalize the constant to the RHS. @@ -1227,7 +1247,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(), - Ops, 2, TD); + Ops, TD); } // Canonicalize the constant to the RHS. @@ -1321,7 +1341,7 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD, if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(), - Ops, 2, TD); + Ops, TD); } // Canonicalize the constant to the RHS. @@ -1372,7 +1392,7 @@ Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD, return ::SimplifyXorInst(Op0, Op1, TD, DT, RecursionLimit); } -static const Type *GetCompareTy(Value *Op) { +static Type *GetCompareTy(Value *Op) { return CmpInst::makeCmpResultType(Op->getType()); } @@ -1413,8 +1433,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, Pred = CmpInst::getSwappedPredicate(Pred); } - const Type *ITy = GetCompareTy(LHS); // The return type. - const Type *OpTy = LHS->getType(); // The operand type. + Type *ITy = GetCompareTy(LHS); // The return type. + Type *OpTy = LHS->getType(); // The operand type. // icmp X, X -> true/false // X icmp undef -> true/false. For example, icmp ugt %X, undef -> false @@ -1478,48 +1498,46 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, default: assert(false && "Unknown ICmp predicate!"); case ICmpInst::ICMP_ULT: - // getNullValue also works for vectors, unlike getFalse. - return Constant::getNullValue(ITy); + return getFalse(ITy); case ICmpInst::ICMP_UGE: - // getAllOnesValue also works for vectors, unlike getTrue. - return ConstantInt::getAllOnesValue(ITy); + return getTrue(ITy); case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULE: if (isKnownNonZero(LHS, TD)) - return Constant::getNullValue(ITy); + return getFalse(ITy); break; case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGT: if (isKnownNonZero(LHS, TD)) - return ConstantInt::getAllOnesValue(ITy); + return getTrue(ITy); break; case ICmpInst::ICMP_SLT: ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); if (LHSKnownNegative) - return ConstantInt::getAllOnesValue(ITy); + return getTrue(ITy); if (LHSKnownNonNegative) - return Constant::getNullValue(ITy); + return getFalse(ITy); break; case ICmpInst::ICMP_SLE: ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); if (LHSKnownNegative) - return ConstantInt::getAllOnesValue(ITy); + return getTrue(ITy); if (LHSKnownNonNegative && isKnownNonZero(LHS, TD)) - return Constant::getNullValue(ITy); + return getFalse(ITy); break; case ICmpInst::ICMP_SGE: ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); if (LHSKnownNegative) - return Constant::getNullValue(ITy); + return getFalse(ITy); if (LHSKnownNonNegative) - return ConstantInt::getAllOnesValue(ITy); + return getTrue(ITy); break; case ICmpInst::ICMP_SGT: ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); if (LHSKnownNegative) - return Constant::getNullValue(ITy); + return getFalse(ITy); if (LHSKnownNonNegative && isKnownNonZero(LHS, TD)) - return ConstantInt::getAllOnesValue(ITy); + return getTrue(ITy); break; } } @@ -1593,8 +1611,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) { Instruction *LI = cast<CastInst>(LHS); Value *SrcOp = LI->getOperand(0); - const Type *SrcTy = SrcOp->getType(); - const Type *DstTy = LI->getType(); + Type *SrcTy = SrcOp->getType(); + Type *DstTy = LI->getType(); // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input // if the integer type is the same size as the pointer type. @@ -1811,8 +1829,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: - // getNullValue also works for vectors, unlike getFalse. - return Constant::getNullValue(ITy); + return getFalse(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD); @@ -1822,8 +1839,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, case ICmpInst::ICMP_NE: case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: - // getAllOnesValue also works for vectors, unlike getTrue. - return Constant::getAllOnesValue(ITy); + return getTrue(ITy); } } if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) { @@ -1840,8 +1856,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: - // getAllOnesValue also works for vectors, unlike getTrue. - return Constant::getAllOnesValue(ITy); + return getTrue(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD); @@ -1851,8 +1866,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: - // getNullValue also works for vectors, unlike getFalse. - return Constant::getNullValue(ITy); + return getFalse(ITy); } } @@ -1874,7 +1888,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return V; break; case Instruction::Shl: { - bool NUW = LBO->hasNoUnsignedWrap() && LBO->hasNoUnsignedWrap(); + bool NUW = LBO->hasNoUnsignedWrap() && RBO->hasNoUnsignedWrap(); bool NSW = LBO->hasNoSignedWrap() && RBO->hasNoSignedWrap(); if (!NUW && !NSW) break; @@ -1955,10 +1969,10 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } case CmpInst::ICMP_SGE: // Always true. - return Constant::getAllOnesValue(ITy); + return getTrue(ITy); case CmpInst::ICMP_SLT: // Always false. - return Constant::getNullValue(ITy); + return getFalse(ITy); } } @@ -2025,10 +2039,10 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } case CmpInst::ICMP_UGE: // Always true. - return Constant::getAllOnesValue(ITy); + return getTrue(ITy); case CmpInst::ICMP_ULT: // Always false. - return Constant::getNullValue(ITy); + return getFalse(ITy); } } @@ -2040,40 +2054,40 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // max(x, ?) pred min(x, ?). if (Pred == CmpInst::ICMP_SGE) // Always true. - return Constant::getAllOnesValue(ITy); + return getTrue(ITy); if (Pred == CmpInst::ICMP_SLT) // Always false. - return Constant::getNullValue(ITy); + return getFalse(ITy); } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) && match(RHS, m_SMax(m_Value(C), m_Value(D))) && (A == C || A == D || B == C || B == D)) { // min(x, ?) pred max(x, ?). if (Pred == CmpInst::ICMP_SLE) // Always true. - return Constant::getAllOnesValue(ITy); + return getTrue(ITy); if (Pred == CmpInst::ICMP_SGT) // Always false. - return Constant::getNullValue(ITy); + return getFalse(ITy); } else if (match(LHS, m_UMax(m_Value(A), m_Value(B))) && match(RHS, m_UMin(m_Value(C), m_Value(D))) && (A == C || A == D || B == C || B == D)) { // max(x, ?) pred min(x, ?). if (Pred == CmpInst::ICMP_UGE) // Always true. - return Constant::getAllOnesValue(ITy); + return getTrue(ITy); if (Pred == CmpInst::ICMP_ULT) // Always false. - return Constant::getNullValue(ITy); + return getFalse(ITy); } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) && match(RHS, m_UMax(m_Value(C), m_Value(D))) && (A == C || A == D || B == C || B == D)) { // min(x, ?) pred max(x, ?). if (Pred == CmpInst::ICMP_ULE) // Always true. - return Constant::getAllOnesValue(ITy); + return getTrue(ITy); if (Pred == CmpInst::ICMP_UGT) // Always false. - return Constant::getNullValue(ITy); + return getFalse(ITy); } // If the comparison is with the result of a select instruction, check whether @@ -2219,43 +2233,71 @@ Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal, /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can /// fold the result. If not, this returns null. -Value *llvm::SimplifyGEPInst(Value *const *Ops, unsigned NumOps, +Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const TargetData *TD, const DominatorTree *) { // The type of the GEP pointer operand. - const PointerType *PtrTy = cast<PointerType>(Ops[0]->getType()); + PointerType *PtrTy = cast<PointerType>(Ops[0]->getType()); // getelementptr P -> P. - if (NumOps == 1) + if (Ops.size() == 1) return Ops[0]; if (isa<UndefValue>(Ops[0])) { // Compute the (pointer) type returned by the GEP instruction. - const Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, &Ops[1], - NumOps-1); - const Type *GEPTy = PointerType::get(LastType, PtrTy->getAddressSpace()); + Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, Ops.slice(1)); + Type *GEPTy = PointerType::get(LastType, PtrTy->getAddressSpace()); return UndefValue::get(GEPTy); } - if (NumOps == 2) { + if (Ops.size() == 2) { // getelementptr P, 0 -> P. if (ConstantInt *C = dyn_cast<ConstantInt>(Ops[1])) if (C->isZero()) return Ops[0]; // getelementptr P, N -> P if P points to a type of zero size. if (TD) { - const Type *Ty = PtrTy->getElementType(); + Type *Ty = PtrTy->getElementType(); if (Ty->isSized() && TD->getTypeAllocSize(Ty) == 0) return Ops[0]; } } // Check to see if this is constant foldable. - for (unsigned i = 0; i != NumOps; ++i) + for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (!isa<Constant>(Ops[i])) return 0; - return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), - (Constant *const*)Ops+1, NumOps-1); + return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1)); +} + +/// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we +/// can fold the result. If not, this returns null. +Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, + ArrayRef<unsigned> Idxs, + const TargetData *, + const DominatorTree *) { + if (Constant *CAgg = dyn_cast<Constant>(Agg)) + if (Constant *CVal = dyn_cast<Constant>(Val)) + return ConstantFoldInsertValueInstruction(CAgg, CVal, Idxs); + + // insertvalue x, undef, n -> x + if (match(Val, m_Undef())) + return Agg; + + // insertvalue x, (extractvalue y, n), n + if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Val)) + if (EV->getAggregateOperand()->getType() == Agg->getType() && + EV->getIndices() == Idxs) { + // insertvalue undef, (extractvalue y, n), n -> y + if (match(Agg, m_Undef())) + return EV->getAggregateOperand(); + + // insertvalue y, (extractvalue y, n), n -> y + if (Agg == EV->getAggregateOperand()) + return Agg; + } + + return 0; } /// SimplifyPHINode - See if we can fold the given phi. If not, returns null. @@ -2328,7 +2370,7 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, if (Constant *CLHS = dyn_cast<Constant>(LHS)) if (Constant *CRHS = dyn_cast<Constant>(RHS)) { Constant *COps[] = {CLHS, CRHS}; - return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, 2, TD); + return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, TD); } // If the operation is associative, try some generic simplifications. @@ -2456,7 +2498,14 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD, break; case Instruction::GetElementPtr: { SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end()); - Result = SimplifyGEPInst(&Ops[0], Ops.size(), TD, DT); + Result = SimplifyGEPInst(Ops, TD, DT); + break; + } + case Instruction::InsertValue: { + InsertValueInst *IV = cast<InsertValueInst>(I); + Result = SimplifyInsertValueInst(IV->getAggregateOperand(), + IV->getInsertedValueOperand(), + IV->getIndices(), TD, DT); break; } case Instruction::PHI: diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 6e27597..f80595c 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -630,7 +630,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV, if (BB == &BB->getParent()->getEntryBlock()) { assert(isa<Argument>(Val) && "Unknown live-in to the entry block"); if (NotNull) { - const PointerType *PTy = cast<PointerType>(Val->getType()); + PointerType *PTy = cast<PointerType>(Val->getType()); Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy)); } else { Result.markOverdefined(); @@ -658,7 +658,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV, // If we previously determined that this is a pointer that can't be null // then return that rather than giving up entirely. if (NotNull) { - const PointerType *PTy = cast<PointerType>(Val->getType()); + PointerType *PTy = cast<PointerType>(Val->getType()); Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy)); } @@ -728,7 +728,7 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV, ConstantRange LHSRange = LHSVal.getConstantRange(); ConstantRange RHSRange(1); - const IntegerType *ResultTy = cast<IntegerType>(BBI->getType()); + IntegerType *ResultTy = cast<IntegerType>(BBI->getType()); if (isa<BinaryOperator>(BBI)) { if (ConstantInt *RHS = dyn_cast<ConstantInt>(BBI->getOperand(1))) { RHSRange = ConstantRange(RHS->getValue()); diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index 89755da..38d677d 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -71,7 +71,7 @@ namespace { void visitCallSite(CallSite CS); void visitMemoryReference(Instruction &I, Value *Ptr, uint64_t Size, unsigned Align, - const Type *Ty, unsigned Flags); + Type *Ty, unsigned Flags); void visitCallInst(CallInst &I); void visitInvokeInst(InvokeInst &I); @@ -201,7 +201,7 @@ void Lint::visitCallSite(CallSite CS) { "Undefined behavior: Caller and callee calling convention differ", &I); - const FunctionType *FT = F->getFunctionType(); + FunctionType *FT = F->getFunctionType(); unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin()); Assert1(FT->isVarArg() ? @@ -240,7 +240,7 @@ void Lint::visitCallSite(CallSite CS) { // Check that an sret argument points to valid memory. if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { - const Type *Ty = + Type *Ty = cast<PointerType>(Formal->getType())->getElementType(); visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty), TD ? TD->getABITypeAlignment(Ty) : 0, @@ -364,7 +364,7 @@ void Lint::visitReturnInst(ReturnInst &I) { // TODO: Check readnone/readonly function attributes. void Lint::visitMemoryReference(Instruction &I, Value *Ptr, uint64_t Size, unsigned Align, - const Type *Ty, unsigned Flags) { + Type *Ty, unsigned Flags) { // If no memory is being referenced, it doesn't matter if the pointer // is valid. if (Size == 0) diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index c5c676b..0e6bcbf 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -63,7 +63,7 @@ static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD, return V; SmallVector<Value*, 8> Indices(GEP->op_begin() + 1, GEP->op_end()); ByteOffset += TD->getIndexedOffset(GEP->getPointerOperandType(), - &Indices[0], Indices.size()); + Indices); V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { V = cast<Operator>(V)->getOperand(0); @@ -90,7 +90,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, if (TD) Base = getUnderlyingObjectWithOffset(V, TD, ByteOffset); - const Type *BaseType = 0; + Type *BaseType = 0; unsigned BaseAlign = 0; if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { // An alloca is safe to load from as load as it is suitably aligned. @@ -114,7 +114,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, return true; // Loading directly from an alloca or global is OK. // Check if the load is within the bounds of the underlying object. - const PointerType *AddrTy = cast<PointerType>(V->getType()); + PointerType *AddrTy = cast<PointerType>(V->getType()); uint64_t LoadSize = TD->getTypeStoreSize(AddrTy->getElementType()); if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType) && (Align == 0 || (ByteOffset % Align) == 0)) @@ -169,7 +169,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, // If we're using alias analysis to disambiguate get the size of *Ptr. uint64_t AccessSize = 0; if (AA) { - const Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType(); + Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType(); AccessSize = AA->getTypeStoreSize(AccessTy); } @@ -188,12 +188,16 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, --ScanFrom; // If this is a load of Ptr, the loaded value is available. + // (This is true even if the load is volatile or atomic, although + // those cases are unlikely.) if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) return LI; if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { // If this is a store through Ptr, the value is available! + // (This is true even if the store is volatile or atomic, although + // those cases are unlikely.) if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) return SI->getOperand(0); diff --git a/lib/Analysis/LoopDependenceAnalysis.cpp b/lib/Analysis/LoopDependenceAnalysis.cpp index c1afe8f..3997ac4 100644 --- a/lib/Analysis/LoopDependenceAnalysis.cpp +++ b/lib/Analysis/LoopDependenceAnalysis.cpp @@ -76,7 +76,13 @@ static void GetMemRefInstrs(const Loop *L, } static bool IsLoadOrStoreInst(Value *I) { - return isa<LoadInst>(I) || isa<StoreInst>(I); + // Returns true if the load or store can be analyzed. Atomic and volatile + // operations have properties which this analysis does not understand. + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + return LI->isUnordered(); + else if (StoreInst *SI = dyn_cast<StoreInst>(I)) + return SI->isUnordered(); + return false; } static Value *GetPointerOperand(Value *I) { diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 0583140..85aacca 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -18,6 +18,7 @@ #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopIterator.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" @@ -55,12 +56,12 @@ bool Loop::isLoopInvariant(Value *V) const { } /// hasLoopInvariantOperands - Return true if all the operands of the -/// specified instruction are loop invariant. +/// specified instruction are loop invariant. bool Loop::hasLoopInvariantOperands(Instruction *I) const { for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) if (!isLoopInvariant(I->getOperand(i))) return false; - + return true; } @@ -98,6 +99,9 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed, return false; if (I->mayReadFromMemory()) return false; + // The landingpad instruction is immobile. + if (isa<LandingPadInst>(I)) + return false; // Determine the insertion point, unless one was given. if (!InsertPt) { BasicBlock *Preheader = getLoopPreheader(); @@ -110,7 +114,7 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed, for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) if (!makeLoopInvariant(I->getOperand(i), Changed, InsertPt)) return false; - + // Hoist. I->moveBefore(InsertPt); Changed = true; @@ -383,6 +387,205 @@ void Loop::dump() const { } //===----------------------------------------------------------------------===// +// UnloopUpdater implementation +// + +namespace { +/// Find the new parent loop for all blocks within the "unloop" whose last +/// backedges has just been removed. +class UnloopUpdater { + Loop *Unloop; + LoopInfo *LI; + + LoopBlocksDFS DFS; + + // Map unloop's immediate subloops to their nearest reachable parents. Nested + // loops within these subloops will not change parents. However, an immediate + // subloop's new parent will be the nearest loop reachable from either its own + // exits *or* any of its nested loop's exits. + DenseMap<Loop*, Loop*> SubloopParents; + + // Flag the presence of an irreducible backedge whose destination is a block + // directly contained by the original unloop. + bool FoundIB; + +public: + UnloopUpdater(Loop *UL, LoopInfo *LInfo) : + Unloop(UL), LI(LInfo), DFS(UL), FoundIB(false) {} + + void updateBlockParents(); + + void removeBlocksFromAncestors(); + + void updateSubloopParents(); + +protected: + Loop *getNearestLoop(BasicBlock *BB, Loop *BBLoop); +}; +} // end anonymous namespace + +/// updateBlockParents - Update the parent loop for all blocks that are directly +/// contained within the original "unloop". +void UnloopUpdater::updateBlockParents() { + if (Unloop->getNumBlocks()) { + // Perform a post order CFG traversal of all blocks within this loop, + // propagating the nearest loop from sucessors to predecessors. + LoopBlocksTraversal Traversal(DFS, LI); + for (LoopBlocksTraversal::POTIterator POI = Traversal.begin(), + POE = Traversal.end(); POI != POE; ++POI) { + + Loop *L = LI->getLoopFor(*POI); + Loop *NL = getNearestLoop(*POI, L); + + if (NL != L) { + // For reducible loops, NL is now an ancestor of Unloop. + assert((NL != Unloop && (!NL || NL->contains(Unloop))) && + "uninitialized successor"); + LI->changeLoopFor(*POI, NL); + } + else { + // Or the current block is part of a subloop, in which case its parent + // is unchanged. + assert((FoundIB || Unloop->contains(L)) && "uninitialized successor"); + } + } + } + // Each irreducible loop within the unloop induces a round of iteration using + // the DFS result cached by Traversal. + bool Changed = FoundIB; + for (unsigned NIters = 0; Changed; ++NIters) { + assert(NIters < Unloop->getNumBlocks() && "runaway iterative algorithm"); + + // Iterate over the postorder list of blocks, propagating the nearest loop + // from successors to predecessors as before. + Changed = false; + for (LoopBlocksDFS::POIterator POI = DFS.beginPostorder(), + POE = DFS.endPostorder(); POI != POE; ++POI) { + + Loop *L = LI->getLoopFor(*POI); + Loop *NL = getNearestLoop(*POI, L); + if (NL != L) { + assert(NL != Unloop && (!NL || NL->contains(Unloop)) && + "uninitialized successor"); + LI->changeLoopFor(*POI, NL); + Changed = true; + } + } + } +} + +/// removeBlocksFromAncestors - Remove unloop's blocks from all ancestors below +/// their new parents. +void UnloopUpdater::removeBlocksFromAncestors() { + // Remove unloop's blocks from all ancestors below their new parents. + for (Loop::block_iterator BI = Unloop->block_begin(), + BE = Unloop->block_end(); BI != BE; ++BI) { + Loop *NewParent = LI->getLoopFor(*BI); + // If this block is an immediate subloop, remove all blocks (including + // nested subloops) from ancestors below the new parent loop. + // Otherwise, if this block is in a nested subloop, skip it. + if (SubloopParents.count(NewParent)) + NewParent = SubloopParents[NewParent]; + else if (Unloop->contains(NewParent)) + continue; + + // Remove blocks from former Ancestors except Unloop itself which will be + // deleted. + for (Loop *OldParent = Unloop->getParentLoop(); OldParent != NewParent; + OldParent = OldParent->getParentLoop()) { + assert(OldParent && "new loop is not an ancestor of the original"); + OldParent->removeBlockFromLoop(*BI); + } + } +} + +/// updateSubloopParents - Update the parent loop for all subloops directly +/// nested within unloop. +void UnloopUpdater::updateSubloopParents() { + while (!Unloop->empty()) { + Loop *Subloop = *llvm::prior(Unloop->end()); + Unloop->removeChildLoop(llvm::prior(Unloop->end())); + + assert(SubloopParents.count(Subloop) && "DFS failed to visit subloop"); + if (SubloopParents[Subloop]) + SubloopParents[Subloop]->addChildLoop(Subloop); + else + LI->addTopLevelLoop(Subloop); + } +} + +/// getNearestLoop - Return the nearest parent loop among this block's +/// successors. If a successor is a subloop header, consider its parent to be +/// the nearest parent of the subloop's exits. +/// +/// For subloop blocks, simply update SubloopParents and return NULL. +Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) { + + // Initially for blocks directly contained by Unloop, NearLoop == Unloop and + // is considered uninitialized. + Loop *NearLoop = BBLoop; + + Loop *Subloop = 0; + if (NearLoop != Unloop && Unloop->contains(NearLoop)) { + Subloop = NearLoop; + // Find the subloop ancestor that is directly contained within Unloop. + while (Subloop->getParentLoop() != Unloop) { + Subloop = Subloop->getParentLoop(); + assert(Subloop && "subloop is not an ancestor of the original loop"); + } + // Get the current nearest parent of the Subloop exits, initially Unloop. + if (!SubloopParents.count(Subloop)) + SubloopParents[Subloop] = Unloop; + NearLoop = SubloopParents[Subloop]; + } + + succ_iterator I = succ_begin(BB), E = succ_end(BB); + if (I == E) { + assert(!Subloop && "subloop blocks must have a successor"); + NearLoop = 0; // unloop blocks may now exit the function. + } + for (; I != E; ++I) { + if (*I == BB) + continue; // self loops are uninteresting + + Loop *L = LI->getLoopFor(*I); + if (L == Unloop) { + // This successor has not been processed. This path must lead to an + // irreducible backedge. + assert((FoundIB || !DFS.hasPostorder(*I)) && "should have seen IB"); + FoundIB = true; + } + if (L != Unloop && Unloop->contains(L)) { + // Successor is in a subloop. + if (Subloop) + continue; // Branching within subloops. Ignore it. + + // BB branches from the original into a subloop header. + assert(L->getParentLoop() == Unloop && "cannot skip into nested loops"); + + // Get the current nearest parent of the Subloop's exits. + L = SubloopParents[L]; + // L could be Unloop if the only exit was an irreducible backedge. + } + if (L == Unloop) { + continue; + } + // Handle critical edges from Unloop into a sibling loop. + if (L && !L->contains(Unloop)) { + L = L->getParentLoop(); + } + // Remember the nearest parent loop among successors or subloop exits. + if (NearLoop == Unloop || !NearLoop || NearLoop->contains(L)) + NearLoop = L; + } + if (Subloop) { + SubloopParents[Subloop] = NearLoop; + return BBLoop; + } + return NearLoop; +} + +//===----------------------------------------------------------------------===// // LoopInfo implementation // bool LoopInfo::runOnFunction(Function &) { @@ -391,6 +594,68 @@ bool LoopInfo::runOnFunction(Function &) { return false; } +/// updateUnloop - The last backedge has been removed from a loop--now the +/// "unloop". Find a new parent for the blocks contained within unloop and +/// update the loop tree. We don't necessarily have valid dominators at this +/// point, but LoopInfo is still valid except for the removal of this loop. +/// +/// Note that Unloop may now be an empty loop. Calling Loop::getHeader without +/// checking first is illegal. +void LoopInfo::updateUnloop(Loop *Unloop) { + + // First handle the special case of no parent loop to simplify the algorithm. + if (!Unloop->getParentLoop()) { + // Since BBLoop had no parent, Unloop blocks are no longer in a loop. + for (Loop::block_iterator I = Unloop->block_begin(), + E = Unloop->block_end(); I != E; ++I) { + + // Don't reparent blocks in subloops. + if (getLoopFor(*I) != Unloop) + continue; + + // Blocks no longer have a parent but are still referenced by Unloop until + // the Unloop object is deleted. + LI.changeLoopFor(*I, 0); + } + + // Remove the loop from the top-level LoopInfo object. + for (LoopInfo::iterator I = LI.begin();; ++I) { + assert(I != LI.end() && "Couldn't find loop"); + if (*I == Unloop) { + LI.removeLoop(I); + break; + } + } + + // Move all of the subloops to the top-level. + while (!Unloop->empty()) + LI.addTopLevelLoop(Unloop->removeChildLoop(llvm::prior(Unloop->end()))); + + return; + } + + // Update the parent loop for all blocks within the loop. Blocks within + // subloops will not change parents. + UnloopUpdater Updater(Unloop, this); + Updater.updateBlockParents(); + + // Remove blocks from former ancestor loops. + Updater.removeBlocksFromAncestors(); + + // Add direct subloops as children in their new parent loop. + Updater.updateSubloopParents(); + + // Remove unloop from its parent loop. + Loop *ParentLoop = Unloop->getParentLoop(); + for (Loop::iterator I = ParentLoop->begin();; ++I) { + assert(I != ParentLoop->end() && "Couldn't find loop"); + if (*I == Unloop) { + ParentLoop->removeChildLoop(I); + break; + } + } +} + void LoopInfo::verifyAnalysis() const { // LoopInfo is a FunctionPass, but verifying every loop in the function // each time verifyAnalysis is called is very expensive. The @@ -400,12 +665,21 @@ void LoopInfo::verifyAnalysis() const { if (!VerifyLoopInfo) return; + DenseSet<const Loop*> Loops; for (iterator I = begin(), E = end(); I != E; ++I) { assert(!(*I)->getParentLoop() && "Top-level loop has a parent!"); - (*I)->verifyLoopNest(); + (*I)->verifyLoopNest(&Loops); } - // TODO: check BBMap consistency. + // Verify that blocks are mapped to valid loops. + // + // FIXME: With an up-to-date DFS (see LoopIterator.h) and DominatorTree, we + // could also verify that the blocks are still in the correct loops. + for (DenseMap<BasicBlock*, Loop*>::const_iterator I = LI.BBMap.begin(), + E = LI.BBMap.end(); I != E; ++I) { + assert(Loops.count(I->second) && "orphaned loop"); + assert(I->second->contains(I->first) && "orphaned block"); + } } void LoopInfo::getAnalysisUsage(AnalysisUsage &AU) const { @@ -417,3 +691,15 @@ void LoopInfo::print(raw_ostream &OS, const Module*) const { LI.print(OS); } +//===----------------------------------------------------------------------===// +// LoopBlocksDFS implementation +// + +/// Traverse the loop blocks and store the DFS result. +/// Useful for clients that just want the final DFS result and don't need to +/// visit blocks during the initial traversal. +void LoopBlocksDFS::perform(LoopInfo *LI) { + LoopBlocksTraversal Traversal(*this, LI); + for (LoopBlocksTraversal::POTIterator POI = Traversal.begin(), + POE = Traversal.end(); POI != POE; ++POI) ; +} diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp index 10e3f29..5ba1f40 100644 --- a/lib/Analysis/LoopPass.cpp +++ b/lib/Analysis/LoopPass.cpp @@ -59,9 +59,9 @@ char PrintLoopPass::ID = 0; static DebugInfoProbeInfo *TheDebugProbe; static void createDebugInfoProbe() { if (TheDebugProbe) return; - - // Constructed the first time this is called. This guarantees that the - // object will be constructed, if -enable-debug-info-probe is set, + + // Constructed the first time this is called. This guarantees that the + // object will be constructed, if -enable-debug-info-probe is set, // before static globals, thus it will be destroyed before them. static ManagedStatic<DebugInfoProbeInfo> DIP; TheDebugProbe = &*DIP; @@ -73,73 +73,29 @@ static void createDebugInfoProbe() { char LPPassManager::ID = 0; -LPPassManager::LPPassManager(int Depth) - : FunctionPass(ID), PMDataManager(Depth) { +LPPassManager::LPPassManager() + : FunctionPass(ID), PMDataManager() { skipThisLoop = false; redoThisLoop = false; LI = NULL; CurrentLoop = NULL; } -/// Delete loop from the loop queue and loop hierarchy (LoopInfo). +/// Delete loop from the loop queue and loop hierarchy (LoopInfo). void LPPassManager::deleteLoopFromQueue(Loop *L) { - if (Loop *ParentLoop = L->getParentLoop()) { // Not a top-level loop. - // Reparent all of the blocks in this loop. Since BBLoop had a parent, - // they are now all in it. - for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); - I != E; ++I) - if (LI->getLoopFor(*I) == L) // Don't change blocks in subloops. - LI->changeLoopFor(*I, ParentLoop); - - // Remove the loop from its parent loop. - for (Loop::iterator I = ParentLoop->begin(), E = ParentLoop->end();; - ++I) { - assert(I != E && "Couldn't find loop"); - if (*I == L) { - ParentLoop->removeChildLoop(I); - break; - } - } - - // Move all subloops into the parent loop. - while (!L->empty()) - ParentLoop->addChildLoop(L->removeChildLoop(L->end()-1)); - } else { - // Reparent all of the blocks in this loop. Since BBLoop had no parent, - // they no longer in a loop at all. - - for (unsigned i = 0; i != L->getBlocks().size(); ++i) { - // Don't change blocks in subloops. - if (LI->getLoopFor(L->getBlocks()[i]) == L) { - LI->removeBlock(L->getBlocks()[i]); - --i; - } - } - - // Remove the loop from the top-level LoopInfo object. - for (LoopInfo::iterator I = LI->begin(), E = LI->end();; ++I) { - assert(I != E && "Couldn't find loop"); - if (*I == L) { - LI->removeLoop(I); - break; - } - } - - // Move all of the subloops to the top-level. - while (!L->empty()) - LI->addTopLevelLoop(L->removeChildLoop(L->end()-1)); - } - - delete L; + LI->updateUnloop(L); // If L is current loop then skip rest of the passes and let // runOnFunction remove L from LQ. Otherwise, remove L from LQ now // and continue applying other passes on CurrentLoop. - if (CurrentLoop == L) { + if (CurrentLoop == L) skipThisLoop = true; + + delete L; + + if (skipThisLoop) return; - } for (std::deque<Loop *>::iterator I = LQ.begin(), E = LQ.end(); I != E; ++I) { @@ -166,10 +122,10 @@ void LPPassManager::insertLoop(Loop *L, Loop *ParentLoop) { void LPPassManager::insertLoopIntoQueue(Loop *L) { // Insert L into loop queue - if (L == CurrentLoop) + if (L == CurrentLoop) redoLoop(L); else if (!L->getParentLoop()) - // This is top level loop. + // This is top level loop. LQ.push_front(L); else { // Insert L after the parent loop. @@ -195,9 +151,9 @@ void LPPassManager::redoLoop(Loop *L) { /// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for /// all loop passes. -void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From, +void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) { - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { LoopPass *LP = getContainedPass(Index); LP->cloneBasicBlockAnalysis(From, To, L); } @@ -206,13 +162,13 @@ void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From, /// deleteSimpleAnalysisValue - Invoke deleteAnalysisValue hook for all passes. void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) { if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) { - for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) { Instruction &I = *BI; deleteSimpleAnalysisValue(&I, L); } } - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { LoopPass *LP = getContainedPass(Index); LP->deleteAnalysisValue(V, L); } @@ -228,7 +184,7 @@ static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) { /// Pass Manager itself does not invalidate any analysis info. void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const { - // LPPassManager needs LoopInfo. In the long term LoopInfo class will + // LPPassManager needs LoopInfo. In the long term LoopInfo class will // become part of LPPassManager. Info.addRequired<LoopInfo>(); Info.setPreservesAll(); @@ -255,7 +211,7 @@ bool LPPassManager::runOnFunction(Function &F) { for (std::deque<Loop *>::const_iterator I = LQ.begin(), E = LQ.end(); I != E; ++I) { Loop *L = *I; - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { LoopPass *P = getContainedPass(Index); Changed |= P->doInitialization(L, *this); } @@ -263,13 +219,13 @@ bool LPPassManager::runOnFunction(Function &F) { // Walk Loops while (!LQ.empty()) { - + CurrentLoop = LQ.back(); skipThisLoop = false; redoThisLoop = false; // Run all passes on the current Loop. - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { LoopPass *P = getContainedPass(Index); dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG, CurrentLoop->getHeader()->getName()); @@ -319,23 +275,23 @@ bool LPPassManager::runOnFunction(Function &F) { // Do not run other passes on this loop. break; } - + // If the loop was deleted, release all the loop passes. This frees up // some memory, and avoids trouble with the pass manager trying to call // verifyAnalysis on them. if (skipThisLoop) - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { Pass *P = getContainedPass(Index); freePass(P, "<deleted>", ON_LOOP_MSG); } // Pop the loop from queue after running all passes. LQ.pop_back(); - + if (redoThisLoop) LQ.push_back(CurrentLoop); } - + // Finalization for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { LoopPass *P = getContainedPass(Index); @@ -372,7 +328,7 @@ Pass *LoopPass::createPrinterPass(raw_ostream &O, // LPPassManger as expected. void LoopPass::preparePassManager(PMStack &PMS) { - // Find LPPassManager + // Find LPPassManager while (!PMS.empty() && PMS.top()->getPassManagerType() > PMT_LoopPassManager) PMS.pop(); @@ -381,14 +337,14 @@ void LoopPass::preparePassManager(PMStack &PMS) { // by other passes that are managed by LPM then do not insert // this pass in current LPM. Use new LPPassManager. if (PMS.top()->getPassManagerType() == PMT_LoopPassManager && - !PMS.top()->preserveHigherLevelAnalysis(this)) + !PMS.top()->preserveHigherLevelAnalysis(this)) PMS.pop(); } /// Assign pass manager to manage this pass. void LoopPass::assignPassManager(PMStack &PMS, PassManagerType PreferredType) { - // Find LPPassManager + // Find LPPassManager while (!PMS.empty() && PMS.top()->getPassManagerType() > PMT_LoopPassManager) PMS.pop(); @@ -397,12 +353,12 @@ void LoopPass::assignPassManager(PMStack &PMS, if (PMS.top()->getPassManagerType() == PMT_LoopPassManager) LPPM = (LPPassManager*)PMS.top(); else { - // Create new Loop Pass Manager if it does not exist. + // Create new Loop Pass Manager if it does not exist. assert (!PMS.empty() && "Unable to create Loop Pass Manager"); PMDataManager *PMD = PMS.top(); - // [1] Create new Call Graph Pass Manager - LPPM = new LPPassManager(PMD->getDepth() + 1); + // [1] Create new Loop Pass Manager + LPPM = new LPPassManager(); LPPM->populateInheritedAnalysis(PMS); // [2] Set up new manager's top level manager diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp index 2283db0..fde07ea 100644 --- a/lib/Analysis/MemDepPrinter.cpp +++ b/lib/Analysis/MemDepPrinter.cpp @@ -25,8 +25,17 @@ namespace { struct MemDepPrinter : public FunctionPass { const Function *F; - typedef PointerIntPair<const Instruction *, 1> InstAndClobberFlag; - typedef std::pair<InstAndClobberFlag, const BasicBlock *> Dep; + enum DepType { + Clobber = 0, + Def, + NonFuncLocal, + Unknown + }; + + static const char* DepTypeStr[]; + + typedef PointerIntPair<const Instruction *, 2, DepType> InstTypePair; + typedef std::pair<InstTypePair, const BasicBlock *> Dep; typedef SmallSetVector<Dep, 4> DepSet; typedef DenseMap<const Instruction *, DepSet> DepSetMap; DepSetMap Deps; @@ -50,6 +59,21 @@ namespace { Deps.clear(); F = 0; } + + private: + static InstTypePair getInstTypePair(MemDepResult dep) { + if (dep.isClobber()) + return InstTypePair(dep.getInst(), Clobber); + if (dep.isDef()) + return InstTypePair(dep.getInst(), Def); + if (dep.isNonFuncLocal()) + return InstTypePair(dep.getInst(), NonFuncLocal); + assert(dep.isUnknown() && "unexptected dependence type"); + return InstTypePair(dep.getInst(), Unknown); + } + static InstTypePair getInstTypePair(const Instruction* inst, DepType type) { + return InstTypePair(inst, type); + } }; } @@ -64,6 +88,9 @@ FunctionPass *llvm::createMemDepPrinter() { return new MemDepPrinter(); } +const char* MemDepPrinter::DepTypeStr[] + = {"Clobber", "Def", "NonFuncLocal", "Unknown"}; + bool MemDepPrinter::runOnFunction(Function &F) { this->F = &F; AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); @@ -79,10 +106,7 @@ bool MemDepPrinter::runOnFunction(Function &F) { MemDepResult Res = MDA.getDependency(Inst); if (!Res.isNonLocal()) { - assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) && - "Local dep should be unknown, def or clobber!"); - Deps[Inst].insert(std::make_pair(InstAndClobberFlag(Res.getInst(), - Res.isClobber()), + Deps[Inst].insert(std::make_pair(getInstTypePair(Res), static_cast<BasicBlock *>(0))); } else if (CallSite CS = cast<Value>(Inst)) { const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI = @@ -92,22 +116,26 @@ bool MemDepPrinter::runOnFunction(Function &F) { for (MemoryDependenceAnalysis::NonLocalDepInfo::const_iterator I = NLDI.begin(), E = NLDI.end(); I != E; ++I) { const MemDepResult &Res = I->getResult(); - assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) && - "Resolved non-local call dep should be unknown, def or " - "clobber!"); - InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(), - Res.isClobber()), - I->getBB())); + InstDeps.insert(std::make_pair(getInstTypePair(Res), I->getBB())); } } else { SmallVector<NonLocalDepResult, 4> NLDI; if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { - // FIXME: Volatile is not handled properly here. + if (!LI->isUnordered()) { + // FIXME: Handle atomic/volatile loads. + Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown), + static_cast<BasicBlock *>(0))); + continue; + } AliasAnalysis::Location Loc = AA.getLocation(LI); - MDA.getNonLocalPointerDependency(Loc, !LI->isVolatile(), - LI->getParent(), NLDI); + MDA.getNonLocalPointerDependency(Loc, true, LI->getParent(), NLDI); } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { - // FIXME: Volatile is not handled properly here. + if (!LI->isUnordered()) { + // FIXME: Handle atomic/volatile stores. + Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown), + static_cast<BasicBlock *>(0))); + continue; + } AliasAnalysis::Location Loc = AA.getLocation(SI); MDA.getNonLocalPointerDependency(Loc, false, SI->getParent(), NLDI); } else if (VAArgInst *VI = dyn_cast<VAArgInst>(Inst)) { @@ -121,11 +149,7 @@ bool MemDepPrinter::runOnFunction(Function &F) { for (SmallVectorImpl<NonLocalDepResult>::const_iterator I = NLDI.begin(), E = NLDI.end(); I != E; ++I) { const MemDepResult &Res = I->getResult(); - assert(Res.isClobber() != Res.isDef() && - "Resolved non-local pointer dep should be def or clobber!"); - InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(), - Res.isClobber()), - I->getBB())); + InstDeps.insert(std::make_pair(getInstTypePair(Res), I->getBB())); } } } @@ -146,26 +170,18 @@ void MemDepPrinter::print(raw_ostream &OS, const Module *M) const { for (DepSet::const_iterator I = InstDeps.begin(), E = InstDeps.end(); I != E; ++I) { const Instruction *DepInst = I->first.getPointer(); - bool isClobber = I->first.getInt(); + DepType type = I->first.getInt(); const BasicBlock *DepBB = I->second; OS << " "; - if (!DepInst) - OS << "Unknown"; - else if (isClobber) - OS << "Clobber"; - else - OS << " Def"; + OS << DepTypeStr[type]; if (DepBB) { OS << " in block "; WriteAsOperand(OS, DepBB, /*PrintType=*/false, M); } if (DepInst) { OS << " from: "; - if (DepInst == Inst) - OS << "<unspecified>"; - else - DepInst->print(OS); + DepInst->print(OS); } OS << "\n"; } diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 53d4304..8d451c4 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -47,7 +47,7 @@ static bool isMallocCall(const CallInst *CI) { // Check malloc prototype. // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin // attribute will exist. - const FunctionType *FTy = Callee->getFunctionType(); + FunctionType *FTy = Callee->getFunctionType(); if (FTy->getNumParams() != 1) return false; return FTy->getParamType(0)->isIntegerTy(32) || @@ -94,12 +94,12 @@ static Value *computeArraySize(const CallInst *CI, const TargetData *TD, return NULL; // The size of the malloc's result type must be known to determine array size. - const Type *T = getMallocAllocatedType(CI); + Type *T = getMallocAllocatedType(CI); if (!T || !T->isSized() || !TD) return NULL; unsigned ElementSize = TD->getTypeAllocSize(T); - if (const StructType *ST = dyn_cast<StructType>(T)) + if (StructType *ST = dyn_cast<StructType>(T)) ElementSize = TD->getStructLayout(ST)->getSizeInBytes(); // If malloc call's arg can be determined to be a multiple of ElementSize, @@ -133,10 +133,10 @@ const CallInst *llvm::isArrayMalloc(const Value *I, const TargetData *TD) { /// 0: PointerType is the calls' return type. /// 1: PointerType is the bitcast's result type. /// >1: Unique PointerType cannot be determined, return NULL. -const PointerType *llvm::getMallocType(const CallInst *CI) { +PointerType *llvm::getMallocType(const CallInst *CI) { assert(isMalloc(CI) && "getMallocType and not malloc call"); - const PointerType *MallocType = NULL; + PointerType *MallocType = NULL; unsigned NumOfBitCastUses = 0; // Determine if CallInst has a bitcast use. @@ -164,8 +164,8 @@ const PointerType *llvm::getMallocType(const CallInst *CI) { /// 0: PointerType is the malloc calls' return type. /// 1: PointerType is the bitcast's result type. /// >1: Unique PointerType cannot be determined, return NULL. -const Type *llvm::getMallocAllocatedType(const CallInst *CI) { - const PointerType *PT = getMallocType(CI); +Type *llvm::getMallocAllocatedType(const CallInst *CI) { + PointerType *PT = getMallocType(CI); return PT ? PT->getElementType() : NULL; } @@ -201,7 +201,7 @@ const CallInst *llvm::isFreeCall(const Value *I) { // Check free prototype. // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin // attribute will exist. - const FunctionType *FTy = Callee->getFunctionType(); + FunctionType *FTy = Callee->getFunctionType(); if (!FTy->getReturnType()->isVoidTy()) return 0; if (FTy->getNumParams() != 1) diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index bba4482..92967c0 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -120,21 +120,27 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, AliasAnalysis::Location &Loc, AliasAnalysis *AA) { if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) { - if (LI->isVolatile()) { - Loc = AliasAnalysis::Location(); + if (LI->isUnordered()) { + Loc = AA->getLocation(LI); + return AliasAnalysis::Ref; + } else if (LI->getOrdering() == Monotonic) { + Loc = AA->getLocation(LI); return AliasAnalysis::ModRef; } - Loc = AA->getLocation(LI); - return AliasAnalysis::Ref; + Loc = AliasAnalysis::Location(); + return AliasAnalysis::ModRef; } if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { - if (SI->isVolatile()) { - Loc = AliasAnalysis::Location(); + if (SI->isUnordered()) { + Loc = AA->getLocation(SI); + return AliasAnalysis::Mod; + } else if (SI->getOrdering() == Monotonic) { + Loc = AA->getLocation(SI); return AliasAnalysis::ModRef; } - Loc = AA->getLocation(SI); - return AliasAnalysis::Mod; + Loc = AliasAnalysis::Location(); + return AliasAnalysis::ModRef; } if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) { @@ -232,7 +238,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, // unknown, otherwise it is non-local. if (BB != &BB->getParent()->getEntryBlock()) return MemDepResult::getNonLocal(); - return MemDepResult::getUnknown(); + return MemDepResult::getNonFuncLocal(); } /// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that @@ -270,8 +276,8 @@ unsigned MemoryDependenceAnalysis:: getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize, const LoadInst *LI, const TargetData &TD) { - // We can only extend non-volatile integer loads. - if (!isa<IntegerType>(LI->getType()) || LI->isVolatile()) return 0; + // We can only extend simple integer loads. + if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) return 0; // Get the base of this load. int64_t LIOffs = 0; @@ -369,6 +375,11 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // Values depend on loads if the pointers are must aliased. This means that // a load depends on another must aliased load from the same value. if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + // Atomic loads have complications involved. + // FIXME: This is overly conservative. + if (!LI->isUnordered()) + return MemDepResult::getClobber(LI); + AliasAnalysis::Location LoadLoc = AA->getLocation(LI); // If we found a pointer, check if it could be the same as our pointer. @@ -382,7 +393,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // location is 1 byte at P+1). If so, return it as a load/load // clobber result, allowing the client to decide to widen the load if // it wants to. - if (const IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) + if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() && isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase, MemLocOffset, LI, TD)) @@ -424,6 +435,11 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, } if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + // Atomic stores have complications involved. + // FIXME: This is overly conservative. + if (!SI->isUnordered()) + return MemDepResult::getClobber(SI); + // If alias analysis can tell that this store is guaranteed to not modify // the query pointer, ignore it. Use getModRefInfo to handle cases where // the query pointer points to constant memory etc. @@ -483,7 +499,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // unknown, otherwise it is non-local. if (BB != &BB->getParent()->getEntryBlock()) return MemDepResult::getNonLocal(); - return MemDepResult::getUnknown(); + return MemDepResult::getNonFuncLocal(); } /// getDependency - Return the instruction on which a memory operation @@ -516,7 +532,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { if (QueryParent != &QueryParent->getParent()->getEntryBlock()) LocalCache = MemDepResult::getNonLocal(); else - LocalCache = MemDepResult::getUnknown(); + LocalCache = MemDepResult::getNonFuncLocal(); } else { AliasAnalysis::Location MemLoc; AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA); @@ -672,7 +688,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { // a clobber, otherwise it is unknown. Dep = MemDepResult::getNonLocal(); } else { - Dep = MemDepResult::getUnknown(); + Dep = MemDepResult::getNonFuncLocal(); } // If we had a dirty entry for the block, update it. Otherwise, just add @@ -790,7 +806,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, // If the block has a dependency (i.e. it isn't completely transparent to // the value), remember the reverse association because we just added it // to Cache! - if (Dep.isNonLocal() || Dep.isUnknown()) + if (!Dep.isDef() && !Dep.isClobber()) return Dep; // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp index 70dcd0d..7e22ddc 100644 --- a/lib/Analysis/PHITransAddr.cpp +++ b/lib/Analysis/PHITransAddr.cpp @@ -228,7 +228,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, return GEP; // Simplify the GEP to handle 'gep x, 0' -> x etc. - if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD, DT)) { + if (Value *V = SimplifyGEPInst(GEPOps, TD, DT)) { for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) RemoveInstInputs(GEPOps[i], InstInputs); @@ -407,9 +407,9 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, } GetElementPtrInst *Result = - GetElementPtrInst::Create(GEPOps[0], GEPOps.begin()+1, GEPOps.end(), - InVal->getName()+".phi.trans.insert", - PredBB->getTerminator()); + GetElementPtrInst::Create(GEPOps[0], makeArrayRef(GEPOps).slice(1), + InVal->getName()+".phi.trans.insert", + PredBB->getTerminator()); Result->setIsInBounds(GEP->isInBounds()); NewInsts.push_back(Result); return Result; diff --git a/lib/Analysis/PathNumbering.cpp b/lib/Analysis/PathNumbering.cpp index 7c584da..0e3b6e6 100644 --- a/lib/Analysis/PathNumbering.cpp +++ b/lib/Analysis/PathNumbering.cpp @@ -387,7 +387,7 @@ void BallLarusDag::buildNode(BLBlockNodeMap& inDag, BLNodeStack& dfsStack) { TerminatorInst* terminator = currentNode->getBlock()->getTerminator(); if(isa<ReturnInst>(terminator) || isa<UnreachableInst>(terminator) - || isa<UnwindInst>(terminator)) + || isa<ResumeInst>(terminator) || isa<UnwindInst>(terminator)) addEdge(currentNode, getExit(),0); currentNode->setColor(BallLarusNode::GRAY); diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp index 80eda79..3a3529b 100644 --- a/lib/Analysis/RegionPass.cpp +++ b/lib/Analysis/RegionPass.cpp @@ -27,8 +27,8 @@ using namespace llvm; char RGPassManager::ID = 0; -RGPassManager::RGPassManager(int Depth) - : FunctionPass(ID), PMDataManager(Depth) { +RGPassManager::RGPassManager() + : FunctionPass(ID), PMDataManager() { skipThisRegion = false; redoThisRegion = false; RI = NULL; @@ -250,7 +250,7 @@ void RegionPass::assignPassManager(PMStack &PMS, PMDataManager *PMD = PMS.top(); // [1] Create new Region Pass Manager - RGPM = new RGPassManager(PMD->getDepth() + 1); + RGPM = new RGPassManager(); RGPM->populateInheritedAnalysis(PMS); // [2] Set up new manager's top level manager diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 025718e..e0ac56c 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -197,7 +197,7 @@ void SCEV::print(raw_ostream &OS) const { } case scUnknown: { const SCEVUnknown *U = cast<SCEVUnknown>(this); - const Type *AllocTy; + Type *AllocTy; if (U->isSizeOf(AllocTy)) { OS << "sizeof(" << *AllocTy << ")"; return; @@ -207,7 +207,7 @@ void SCEV::print(raw_ostream &OS) const { return; } - const Type *CTy; + Type *CTy; Constant *FieldNo; if (U->isOffsetOf(CTy, FieldNo)) { OS << "offsetof(" << *CTy << ", "; @@ -228,7 +228,7 @@ void SCEV::print(raw_ostream &OS) const { llvm_unreachable("Unknown SCEV kind!"); } -const Type *SCEV::getType() const { +Type *SCEV::getType() const { switch (getSCEVType()) { case scConstant: return cast<SCEVConstant>(this)->getType(); @@ -297,17 +297,17 @@ const SCEV *ScalarEvolution::getConstant(const APInt& Val) { } const SCEV * -ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) { - const IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty)); +ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) { + IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty)); return getConstant(ConstantInt::get(ITy, V, isSigned)); } SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, - unsigned SCEVTy, const SCEV *op, const Type *ty) + unsigned SCEVTy, const SCEV *op, Type *ty) : SCEV(ID, SCEVTy), Op(op), Ty(ty) {} SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, - const SCEV *op, const Type *ty) + const SCEV *op, Type *ty) : SCEVCastExpr(ID, scTruncate, op, ty) { assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && @@ -315,7 +315,7 @@ SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, } SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID, - const SCEV *op, const Type *ty) + const SCEV *op, Type *ty) : SCEVCastExpr(ID, scZeroExtend, op, ty) { assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && @@ -323,7 +323,7 @@ SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID, } SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID, - const SCEV *op, const Type *ty) + const SCEV *op, Type *ty) : SCEVCastExpr(ID, scSignExtend, op, ty) { assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && @@ -354,7 +354,7 @@ void SCEVUnknown::allUsesReplacedWith(Value *New) { setValPtr(New); } -bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const { +bool SCEVUnknown::isSizeOf(Type *&AllocTy) const { if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) if (VCE->getOpcode() == Instruction::PtrToInt) if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) @@ -371,15 +371,15 @@ bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const { return false; } -bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const { +bool SCEVUnknown::isAlignOf(Type *&AllocTy) const { if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) if (VCE->getOpcode() == Instruction::PtrToInt) if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) if (CE->getOpcode() == Instruction::GetElementPtr && CE->getOperand(0)->isNullValue()) { - const Type *Ty = + Type *Ty = cast<PointerType>(CE->getOperand(0)->getType())->getElementType(); - if (const StructType *STy = dyn_cast<StructType>(Ty)) + if (StructType *STy = dyn_cast<StructType>(Ty)) if (!STy->isPacked() && CE->getNumOperands() == 3 && CE->getOperand(1)->isNullValue()) { @@ -396,7 +396,7 @@ bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const { return false; } -bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const { +bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const { if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) if (VCE->getOpcode() == Instruction::PtrToInt) if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) @@ -404,7 +404,7 @@ bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const { CE->getNumOperands() == 3 && CE->getOperand(0)->isNullValue() && CE->getOperand(1)->isNullValue()) { - const Type *Ty = + Type *Ty = cast<PointerType>(CE->getOperand(0)->getType())->getElementType(); // Ignore vector types here so that ScalarEvolutionExpander doesn't // emit getelementptrs that index into vectors. @@ -652,7 +652,7 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops, /// Assume, K > 0. static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, ScalarEvolution &SE, - const Type* ResultTy) { + Type *ResultTy) { // Handle the simplest case efficiently. if (K == 1) return SE.getTruncateOrZeroExtend(It, ResultTy); @@ -742,7 +742,7 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, MultiplyFactor = MultiplyFactor.trunc(W); // Calculate the product, at width T+W - const IntegerType *CalculationTy = IntegerType::get(SE.getContext(), + IntegerType *CalculationTy = IntegerType::get(SE.getContext(), CalculationBits); const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy); for (unsigned i = 1; i != K; ++i) { @@ -790,7 +790,7 @@ const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It, //===----------------------------------------------------------------------===// const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, - const Type *Ty) { + Type *Ty) { assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) && "This is not a truncating conversion!"); assert(isSCEVable(Ty) && @@ -877,7 +877,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, } const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, - const Type *Ty) { + Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && @@ -954,7 +954,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); if (MaxBECount == RecastedMaxBECount) { - const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); + Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no unsigned overflow. const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step); const SCEV *Add = getAddExpr(Start, ZMul); @@ -1062,7 +1062,7 @@ static const SCEV *getOverflowLimitForStep(const SCEV *Step, // result, the expression "Step + sext(PreIncAR)" is congruent with // "sext(PostIncAR)" static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR, - const Type *Ty, + Type *Ty, ScalarEvolution *SE) { const Loop *L = AR->getLoop(); const SCEV *Start = AR->getStart(); @@ -1070,14 +1070,26 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR, // Check for a simple looking step prior to loop entry. const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start); - if (!SA || SA->getNumOperands() != 2 || SA->getOperand(0) != Step) + if (!SA) + return 0; + + // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV + // subtraction is expensive. For this purpose, perform a quick and dirty + // difference, by checking for Step in the operand list. + SmallVector<const SCEV *, 4> DiffOps; + for (SCEVAddExpr::op_iterator I = SA->op_begin(), E = SA->op_end(); + I != E; ++I) { + if (*I != Step) + DiffOps.push_back(*I); + } + if (DiffOps.size() == SA->getNumOperands()) return 0; // This is a postinc AR. Check for overflow on the preinc recurrence using the // same three conditions that getSignExtendedExpr checks. // 1. NSW flags on the step increment. - const SCEV *PreStart = SA->getOperand(1); + const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags()); const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>( SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap)); @@ -1086,7 +1098,7 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR, // 2. Direct overflow check on the step operation's expression. unsigned BitWidth = SE->getTypeSizeInBits(AR->getType()); - const Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2); + Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2); const SCEV *OperandExtendedStart = SE->getAddExpr(SE->getSignExtendExpr(PreStart, WideTy), SE->getSignExtendExpr(Step, WideTy)); @@ -1112,7 +1124,7 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR, // Get the normalized sign-extended expression for this AddRec's Start. static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR, - const Type *Ty, + Type *Ty, ScalarEvolution *SE) { const SCEV *PreStart = getPreStartForSignExtend(AR, Ty, SE); if (!PreStart) @@ -1123,7 +1135,7 @@ static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR, } const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, - const Type *Ty) { + Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && @@ -1208,7 +1220,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); if (MaxBECount == RecastedMaxBECount) { - const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); + Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no signed overflow. const SCEV *SMul = getMulExpr(CastedMaxBECount, Step); const SCEV *Add = getAddExpr(Start, SMul); @@ -1275,7 +1287,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, /// unspecified bits out to the given type. /// const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, - const Type *Ty) { + Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && @@ -1438,7 +1450,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, assert(!Ops.empty() && "Cannot get empty add!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG - const Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); + Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVAddExpr operand types don't match!"); @@ -1488,7 +1500,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, // Okay, check to see if the same value occurs in the operand list more than // once. If so, merge them together into an multiply expression. Since we // sorted the list, these values are required to be adjacent. - const Type *Ty = Ops[0]->getType(); + Type *Ty = Ops[0]->getType(); bool FoundMatch = false; for (unsigned i = 0, e = Ops.size(); i != e-1; ++i) if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2 @@ -1515,8 +1527,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, // if the contents of the resulting outer trunc fold to something simple. for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) { const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]); - const Type *DstType = Trunc->getType(); - const Type *SrcType = Trunc->getOperand()->getType(); + Type *DstType = Trunc->getType(); + Type *SrcType = Trunc->getOperand()->getType(); SmallVector<const SCEV *, 8> LargeOps; bool Ok = true; // Check all the operands to see if they can be represented in the @@ -1735,7 +1747,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; - // Otherwise, add the folded AddRec by the non-liv parts. + // Otherwise, add the folded AddRec by the non-invariant parts. for (unsigned i = 0;; ++i) if (Ops[i] == AddRec) { Ops[i] = NewRec; @@ -1800,6 +1812,38 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, return S; } +static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) { + uint64_t k = i*j; + if (j > 1 && k / j != i) Overflow = true; + return k; +} + +/// Compute the result of "n choose k", the binomial coefficient. If an +/// intermediate computation overflows, Overflow will be set and the return will +/// be garbage. Overflow is not cleared on absense of overflow. +static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) { + // We use the multiplicative formula: + // n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 . + // At each iteration, we take the n-th term of the numeral and divide by the + // (k-n)th term of the denominator. This division will always produce an + // integral result, and helps reduce the chance of overflow in the + // intermediate computations. However, we can still overflow even when the + // final result would fit. + + if (n == 0 || n == k) return 1; + if (k > n) return 0; + + if (k > n/2) + k = n-k; + + uint64_t r = 1; + for (uint64_t i = 1; i <= k; ++i) { + r = umul_ov(r, n-(i-1), Overflow); + r /= i; + } + return r; +} + /// getMulExpr - Get a canonical multiply expression, or something simpler if /// possible. const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, @@ -1809,7 +1853,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, assert(!Ops.empty() && "Cannot get empty mul!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG - const Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); + Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVMulExpr operand types don't match!"); @@ -1960,7 +2004,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; - // Otherwise, multiply the folded AddRec by the non-liv parts. + // Otherwise, multiply the folded AddRec by the non-invariant parts. for (unsigned i = 0;; ++i) if (Ops[i] == AddRec) { Ops[i] = NewRec; @@ -1974,31 +2018,65 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, // multiplied together. If so, we can fold them. for (unsigned OtherIdx = Idx+1; OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); - ++OtherIdx) + ++OtherIdx) { if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) { - // F * G, where F = {A,+,B}<L> and G = {C,+,D}<L> --> - // {A*C,+,F*D + G*B + B*D}<L> + // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L> + // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [ + // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z + // ]]],+,...up to x=2n}. + // Note that the arguments to choose() are always integers with values + // known at compile time, never SCEV objects. + // + // The implementation avoids pointless extra computations when the two + // addrec's are of different length (mathematically, it's equivalent to + // an infinite stream of zeros on the right). + bool OpsModified = false; for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); ++OtherIdx) if (const SCEVAddRecExpr *OtherAddRec = dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx])) if (OtherAddRec->getLoop() == AddRecLoop) { - const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec; - const SCEV *NewStart = getMulExpr(F->getStart(), G->getStart()); - const SCEV *B = F->getStepRecurrence(*this); - const SCEV *D = G->getStepRecurrence(*this); - const SCEV *NewStep = getAddExpr(getMulExpr(F, D), - getMulExpr(G, B), - getMulExpr(B, D)); - const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep, - F->getLoop(), - SCEV::FlagAnyWrap); - if (Ops.size() == 2) return NewAddRec; - Ops[Idx] = AddRec = cast<SCEVAddRecExpr>(NewAddRec); - Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; + bool Overflow = false; + Type *Ty = AddRec->getType(); + bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64; + SmallVector<const SCEV*, 7> AddRecOps; + for (int x = 0, xe = AddRec->getNumOperands() + + OtherAddRec->getNumOperands() - 1; + x != xe && !Overflow; ++x) { + const SCEV *Term = getConstant(Ty, 0); + for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) { + uint64_t Coeff1 = Choose(x, 2*x - y, Overflow); + for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1), + ze = std::min(x+1, (int)OtherAddRec->getNumOperands()); + z < ze && !Overflow; ++z) { + uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow); + uint64_t Coeff; + if (LargerThan64Bits) + Coeff = umul_ov(Coeff1, Coeff2, Overflow); + else + Coeff = Coeff1*Coeff2; + const SCEV *CoeffTerm = getConstant(Ty, Coeff); + const SCEV *Term1 = AddRec->getOperand(y-z); + const SCEV *Term2 = OtherAddRec->getOperand(z); + Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2)); + } + } + AddRecOps.push_back(Term); + } + if (!Overflow) { + const SCEV *NewAddRec = getAddRecExpr(AddRecOps, + AddRec->getLoop(), + SCEV::FlagAnyWrap); + if (Ops.size() == 2) return NewAddRec; + Ops[Idx] = AddRec = cast<SCEVAddRecExpr>(NewAddRec); + Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; + OpsModified = true; + } } - return getMulExpr(Ops); + if (OpsModified) + return getMulExpr(Ops); } + } // Otherwise couldn't fold anything into this recurrence. Move onto the // next one. @@ -2042,21 +2120,22 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, // Determine if the division can be folded into the operands of // its operands. // TODO: Generalize this to non-constants by using known-bits information. - const Type *Ty = LHS->getType(); + Type *Ty = LHS->getType(); unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros(); unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1; // For non-power-of-two values, effectively round the value up to the // nearest power of two. if (!RHSC->getValue()->getValue().isPowerOf2()) ++MaxShiftAmt; - const IntegerType *ExtTy = + IntegerType *ExtTy = IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt); - // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) if (const SCEVConstant *Step = - dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) - if (!Step->getValue()->getValue() - .urem(RHSC->getValue()->getValue()) && + dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) { + // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded. + const APInt &StepInt = Step->getValue()->getValue(); + const APInt &DivInt = RHSC->getValue()->getValue(); + if (!StepInt.urem(DivInt) && getZeroExtendExpr(AR, ExtTy) == getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), getZeroExtendExpr(Step, ExtTy), @@ -2067,6 +2146,22 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, return getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagNW); } + /// Get a canonical UDivExpr for a recurrence. + /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0. + // We can currently only fold X%N if X is constant. + const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart()); + if (StartC && !DivInt.urem(StepInt) && + getZeroExtendExpr(AR, ExtTy) == + getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), + getZeroExtendExpr(Step, ExtTy), + AR->getLoop(), SCEV::FlagAnyWrap)) { + const APInt &StartInt = StartC->getValue()->getValue(); + const APInt &StartRem = StartInt.urem(StepInt); + if (StartRem != 0) + LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step, + AR->getLoop(), SCEV::FlagNW); + } + } // (A*B)/C --> A*(B/C) if safe and B/C can be folded. if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) { SmallVector<const SCEV *, 4> Operands; @@ -2151,7 +2246,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, const Loop *L, SCEV::NoWrapFlags Flags) { if (Operands.size() == 1) return Operands[0]; #ifndef NDEBUG - const Type *ETy = getEffectiveSCEVType(Operands[0]->getType()); + Type *ETy = getEffectiveSCEVType(Operands[0]->getType()); for (unsigned i = 1, e = Operands.size(); i != e; ++i) assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy && "SCEVAddRecExpr operand types don't match!"); @@ -2269,7 +2364,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { assert(!Ops.empty() && "Cannot get empty smax!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG - const Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); + Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVSMaxExpr operand types don't match!"); @@ -2373,7 +2468,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { assert(!Ops.empty() && "Cannot get empty umax!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG - const Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); + Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVUMaxExpr operand types don't match!"); @@ -2476,7 +2571,7 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); } -const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) { +const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) { // If we have TargetData, we can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. @@ -2488,20 +2583,20 @@ const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) { if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) C = Folded; - const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); + Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); } -const SCEV *ScalarEvolution::getAlignOfExpr(const Type *AllocTy) { +const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) { Constant *C = ConstantExpr::getAlignOf(AllocTy); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) C = Folded; - const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); + Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); } -const SCEV *ScalarEvolution::getOffsetOfExpr(const StructType *STy, +const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy, unsigned FieldNo) { // If we have TargetData, we can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. @@ -2514,17 +2609,17 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(const StructType *STy, if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) C = Folded; - const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); + Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); } -const SCEV *ScalarEvolution::getOffsetOfExpr(const Type *CTy, +const SCEV *ScalarEvolution::getOffsetOfExpr(Type *CTy, Constant *FieldNo) { Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) C = Folded; - const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy)); + Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); } @@ -2558,14 +2653,14 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) { /// the SCEV framework. This primarily includes integer types, and it /// can optionally include pointer types if the ScalarEvolution class /// has access to target-specific information. -bool ScalarEvolution::isSCEVable(const Type *Ty) const { +bool ScalarEvolution::isSCEVable(Type *Ty) const { // Integers and pointers are always SCEVable. return Ty->isIntegerTy() || Ty->isPointerTy(); } /// getTypeSizeInBits - Return the size in bits of the specified type, /// for which isSCEVable must return true. -uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const { +uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); // If we have a TargetData, use it! @@ -2586,7 +2681,7 @@ uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const { /// the given type and which represents how SCEV will treat the given /// type, for which isSCEVable must return true. For pointer types, /// this is the pointer-sized integer type. -const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const { +Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); if (Ty->isIntegerTy()) @@ -2628,7 +2723,7 @@ const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) { return getConstant( cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue()))); - const Type *Ty = V->getType(); + Type *Ty = V->getType(); Ty = getEffectiveSCEVType(Ty); return getMulExpr(V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)))); @@ -2640,7 +2735,7 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { return getConstant( cast<ConstantInt>(ConstantExpr::getNot(VC->getValue()))); - const Type *Ty = V->getType(); + Type *Ty = V->getType(); Ty = getEffectiveSCEVType(Ty); const SCEV *AllOnes = getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))); @@ -2664,8 +2759,8 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, /// input value to the specified type. If the type must be extended, it is zero /// extended. const SCEV * -ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, const Type *Ty) { - const Type *SrcTy = V->getType(); +ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) { + Type *SrcTy = V->getType(); assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot truncate or zero extend with non-integer arguments!"); @@ -2681,8 +2776,8 @@ ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, const Type *Ty) { /// extended. const SCEV * ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, - const Type *Ty) { - const Type *SrcTy = V->getType(); + Type *Ty) { + Type *SrcTy = V->getType(); assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot truncate or zero extend with non-integer arguments!"); @@ -2697,8 +2792,8 @@ ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, /// input value to the specified type. If the type must be extended, it is zero /// extended. The conversion must not be narrowing. const SCEV * -ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) { - const Type *SrcTy = V->getType(); +ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) { + Type *SrcTy = V->getType(); assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot noop or zero extend with non-integer arguments!"); @@ -2713,8 +2808,8 @@ ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) { /// input value to the specified type. If the type must be extended, it is sign /// extended. The conversion must not be narrowing. const SCEV * -ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) { - const Type *SrcTy = V->getType(); +ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) { + Type *SrcTy = V->getType(); assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot noop or sign extend with non-integer arguments!"); @@ -2730,8 +2825,8 @@ ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) { /// it is extended with unspecified bits. The conversion must not be /// narrowing. const SCEV * -ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) { - const Type *SrcTy = V->getType(); +ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) { + Type *SrcTy = V->getType(); assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot noop or any extend with non-integer arguments!"); @@ -2745,8 +2840,8 @@ ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) { /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the /// input value to the specified type. The conversion must not be widening. const SCEV * -ScalarEvolution::getTruncateOrNoop(const SCEV *V, const Type *Ty) { - const Type *SrcTy = V->getType(); +ScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) { + Type *SrcTy = V->getType(); assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && (Ty->isIntegerTy() || Ty->isPointerTy()) && "Cannot truncate or noop with non-integer arguments!"); @@ -3032,7 +3127,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { // context. bool isInBounds = GEP->isInBounds(); - const Type *IntPtrTy = getEffectiveSCEVType(GEP->getType()); + Type *IntPtrTy = getEffectiveSCEVType(GEP->getType()); Value *Base = GEP->getOperand(0); // Don't attempt to analyze GEPs over unsized objects. if (!cast<PointerType>(Base->getType())->getElementType()->isSized()) @@ -3044,7 +3139,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { I != E; ++I) { Value *Index = *I; // Compute the (potentially symbolic) offset in bytes for this index. - if (const StructType *STy = dyn_cast<StructType>(*GTI++)) { + if (StructType *STy = dyn_cast<StructType>(*GTI++)) { // For a struct, add the member offset. unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo); @@ -3244,7 +3339,7 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { // TODO: non-affine addrec if (AddRec->isAffine()) { - const Type *Ty = AddRec->getType(); + Type *Ty = AddRec->getType(); const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); if (!isa<SCEVCouldNotCompute>(MaxBECount) && getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) { @@ -3396,7 +3491,7 @@ ScalarEvolution::getSignedRange(const SCEV *S) { // TODO: non-affine addrec if (AddRec->isAffine()) { - const Type *Ty = AddRec->getType(); + Type *Ty = AddRec->getType(); const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); if (!isa<SCEVCouldNotCompute>(MaxBECount) && getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) { @@ -3503,7 +3598,13 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { AddOps.push_back(Op1); } AddOps.push_back(getSCEV(U->getOperand(0))); - return getAddExpr(AddOps); + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; + OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(V); + if (OBO->hasNoSignedWrap()) + setFlags(Flags, SCEV::FlagNSW); + if (OBO->hasNoUnsignedWrap()) + setFlags(Flags, SCEV::FlagNUW); + return getAddExpr(AddOps, Flags); } case Instruction::Mul: { // See the Add code above. @@ -3601,9 +3702,9 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { LCI->getValue() == CI->getValue()) if (const SCEVZeroExtendExpr *Z = dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) { - const Type *UTy = U->getType(); + Type *UTy = U->getType(); const SCEV *Z0 = Z->getOperand(); - const Type *Z0Ty = Z0->getType(); + Type *Z0Ty = Z0->getType(); unsigned Z0TySize = getTypeSizeInBits(Z0Ty); // If C is a low-bits mask, the zero extend is serving to @@ -3813,6 +3914,70 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // Iteration Count Computation Code // +/// getSmallConstantTripCount - Returns the maximum trip count of this loop as a +/// normal unsigned value, if possible. Returns 0 if the trip count is unknown +/// or not constant. Will also return 0 if the maximum trip count is very large +/// (>= 2^32) +unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L, + BasicBlock *ExitBlock) { + const SCEVConstant *ExitCount = + dyn_cast<SCEVConstant>(getExitCount(L, ExitBlock)); + if (!ExitCount) + return 0; + + ConstantInt *ExitConst = ExitCount->getValue(); + + // Guard against huge trip counts. + if (ExitConst->getValue().getActiveBits() > 32) + return 0; + + // In case of integer overflow, this returns 0, which is correct. + return ((unsigned)ExitConst->getZExtValue()) + 1; +} + +/// getSmallConstantTripMultiple - Returns the largest constant divisor of the +/// trip count of this loop as a normal unsigned value, if possible. This +/// means that the actual trip count is always a multiple of the returned +/// value (don't forget the trip count could very well be zero as well!). +/// +/// Returns 1 if the trip count is unknown or not guaranteed to be the +/// multiple of a constant (which is also the case if the trip count is simply +/// constant, use getSmallConstantTripCount for that case), Will also return 1 +/// if the trip count is very large (>= 2^32). +unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L, + BasicBlock *ExitBlock) { + const SCEV *ExitCount = getExitCount(L, ExitBlock); + if (ExitCount == getCouldNotCompute()) + return 1; + + // Get the trip count from the BE count by adding 1. + const SCEV *TCMul = getAddExpr(ExitCount, + getConstant(ExitCount->getType(), 1)); + // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt + // to factor simple cases. + if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul)) + TCMul = Mul->getOperand(0); + + const SCEVConstant *MulC = dyn_cast<SCEVConstant>(TCMul); + if (!MulC) + return 1; + + ConstantInt *Result = MulC->getValue(); + + // Guard against huge trip counts. + if (!Result || Result->getValue().getActiveBits() > 32) + return 1; + + return (unsigned)Result->getZExtValue(); +} + +// getExitCount - Get the expression for the number of loop iterations for which +// this loop is guaranteed not to exit via ExitintBlock. Otherwise return +// SCEVCouldNotCompute. +const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) { + return getBackedgeTakenInfo(L).getExact(ExitingBlock, this); +} + /// getBackedgeTakenCount - If the specified loop has a predictable /// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute /// object. The backedge-taken count is the number of times the loop header @@ -3825,14 +3990,14 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { /// hasLoopInvariantBackedgeTakenCount). /// const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) { - return getBackedgeTakenInfo(L).Exact; + return getBackedgeTakenInfo(L).getExact(this); } /// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except /// return the least SCEV value that is known never to be less than the /// actual backedge taken count. const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) { - return getBackedgeTakenInfo(L).Max; + return getBackedgeTakenInfo(L).getMax(this); } /// PushLoopPHIs - Push PHI nodes in the header of the given loop @@ -3849,33 +4014,31 @@ PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) { const ScalarEvolution::BackedgeTakenInfo & ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { - // Initially insert a CouldNotCompute for this loop. If the insertion + // Initially insert an invalid entry for this loop. If the insertion // succeeds, proceed to actually compute a backedge-taken count and // update the value. The temporary CouldNotCompute value tells SCEV // code elsewhere that it shouldn't attempt to request a new // backedge-taken count, which could result in infinite recursion. std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair = - BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute())); + BackedgeTakenCounts.insert(std::make_pair(L, BackedgeTakenInfo())); if (!Pair.second) return Pair.first->second; - BackedgeTakenInfo Result = getCouldNotCompute(); - BackedgeTakenInfo Computed = ComputeBackedgeTakenCount(L); - if (Computed.Exact != getCouldNotCompute()) { - assert(isLoopInvariant(Computed.Exact, L) && - isLoopInvariant(Computed.Max, L) && + // ComputeBackedgeTakenCount may allocate memory for its result. Inserting it + // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result + // must be cleared in this scope. + BackedgeTakenInfo Result = ComputeBackedgeTakenCount(L); + + if (Result.getExact(this) != getCouldNotCompute()) { + assert(isLoopInvariant(Result.getExact(this), L) && + isLoopInvariant(Result.getMax(this), L) && "Computed backedge-taken count isn't loop invariant for loop!"); ++NumTripCountsComputed; - - // Update the value in the map. - Result = Computed; - } else { - if (Computed.Max != getCouldNotCompute()) - // Update the value in the map. - Result = Computed; - if (isa<PHINode>(L->getHeader()->begin())) - // Only count loops that have phi nodes as not being computable. - ++NumTripCountsNotComputed; + } + else if (Result.getMax(this) == getCouldNotCompute() && + isa<PHINode>(L->getHeader()->begin())) { + // Only count loops that have phi nodes as not being computable. + ++NumTripCountsNotComputed; } // Now that we know more about the trip count for this loop, forget any @@ -3883,7 +4046,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // conservative estimates made without the benefit of trip count // information. This is similar to the code in forgetLoop, except that // it handles SCEVUnknown PHI nodes specially. - if (Computed.hasAnyInfo()) { + if (Result.hasAnyInfo()) { SmallVector<Instruction *, 16> Worklist; PushLoopPHIs(L, Worklist); @@ -3928,7 +4091,12 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { /// compute a trip count, or if the loop is deleted. void ScalarEvolution::forgetLoop(const Loop *L) { // Drop any stored trip count value. - BackedgeTakenCounts.erase(L); + DenseMap<const Loop*, BackedgeTakenInfo>::iterator BTCPos = + BackedgeTakenCounts.find(L); + if (BTCPos != BackedgeTakenCounts.end()) { + BTCPos->second.clear(); + BackedgeTakenCounts.erase(BTCPos); + } // Drop information about expressions based on loop-header PHIs. SmallVector<Instruction *, 16> Worklist; @@ -3984,6 +4152,85 @@ void ScalarEvolution::forgetValue(Value *V) { } } +/// getExact - Get the exact loop backedge taken count considering all loop +/// exits. If all exits are computable, this is the minimum computed count. +const SCEV * +ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const { + // If any exits were not computable, the loop is not computable. + if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute(); + + // We need at least one computable exit. + if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute(); + assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info"); + + const SCEV *BECount = 0; + for (const ExitNotTakenInfo *ENT = &ExitNotTaken; + ENT != 0; ENT = ENT->getNextExit()) { + + assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV"); + + if (!BECount) + BECount = ENT->ExactNotTaken; + else + BECount = SE->getUMinFromMismatchedTypes(BECount, ENT->ExactNotTaken); + } + assert(BECount && "Invalid not taken count for loop exit"); + return BECount; +} + +/// getExact - Get the exact not taken count for this loop exit. +const SCEV * +ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock, + ScalarEvolution *SE) const { + for (const ExitNotTakenInfo *ENT = &ExitNotTaken; + ENT != 0; ENT = ENT->getNextExit()) { + + if (ENT->ExitingBlock == ExitingBlock) + return ENT->ExactNotTaken; + } + return SE->getCouldNotCompute(); +} + +/// getMax - Get the max backedge taken count for the loop. +const SCEV * +ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const { + return Max ? Max : SE->getCouldNotCompute(); +} + +/// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each +/// computable exit into a persistent ExitNotTakenInfo array. +ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( + SmallVectorImpl< std::pair<BasicBlock *, const SCEV *> > &ExitCounts, + bool Complete, const SCEV *MaxCount) : Max(MaxCount) { + + if (!Complete) + ExitNotTaken.setIncomplete(); + + unsigned NumExits = ExitCounts.size(); + if (NumExits == 0) return; + + ExitNotTaken.ExitingBlock = ExitCounts[0].first; + ExitNotTaken.ExactNotTaken = ExitCounts[0].second; + if (NumExits == 1) return; + + // Handle the rare case of multiple computable exits. + ExitNotTakenInfo *ENT = new ExitNotTakenInfo[NumExits-1]; + + ExitNotTakenInfo *PrevENT = &ExitNotTaken; + for (unsigned i = 1; i < NumExits; ++i, PrevENT = ENT, ++ENT) { + PrevENT->setNextExit(ENT); + ENT->ExitingBlock = ExitCounts[i].first; + ENT->ExactNotTaken = ExitCounts[i].second; + } +} + +/// clear - Invalidate this result and free the ExitNotTakenInfo array. +void ScalarEvolution::BackedgeTakenInfo::clear() { + ExitNotTaken.ExitingBlock = 0; + ExitNotTaken.ExactNotTaken = 0; + delete[] ExitNotTaken.getNextExit(); +} + /// ComputeBackedgeTakenCount - Compute the number of times the backedge /// of the specified loop will execute. ScalarEvolution::BackedgeTakenInfo @@ -3992,38 +4239,31 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { L->getExitingBlocks(ExitingBlocks); // Examine all exits and pick the most conservative values. - const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); - bool CouldNotComputeBECount = false; + bool CouldComputeBECount = true; + SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts; for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { - BackedgeTakenInfo NewBTI = - ComputeBackedgeTakenCountFromExit(L, ExitingBlocks[i]); - - if (NewBTI.Exact == getCouldNotCompute()) { + ExitLimit EL = ComputeExitLimit(L, ExitingBlocks[i]); + if (EL.Exact == getCouldNotCompute()) // We couldn't compute an exact value for this exit, so // we won't be able to compute an exact value for the loop. - CouldNotComputeBECount = true; - BECount = getCouldNotCompute(); - } else if (!CouldNotComputeBECount) { - if (BECount == getCouldNotCompute()) - BECount = NewBTI.Exact; - else - BECount = getUMinFromMismatchedTypes(BECount, NewBTI.Exact); - } + CouldComputeBECount = false; + else + ExitCounts.push_back(std::make_pair(ExitingBlocks[i], EL.Exact)); + if (MaxBECount == getCouldNotCompute()) - MaxBECount = NewBTI.Max; - else if (NewBTI.Max != getCouldNotCompute()) - MaxBECount = getUMinFromMismatchedTypes(MaxBECount, NewBTI.Max); + MaxBECount = EL.Max; + else if (EL.Max != getCouldNotCompute()) + MaxBECount = getUMinFromMismatchedTypes(MaxBECount, EL.Max); } - return BackedgeTakenInfo(BECount, MaxBECount); + return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount); } -/// ComputeBackedgeTakenCountFromExit - Compute the number of times the backedge -/// of the specified loop will execute if it exits via the specified block. -ScalarEvolution::BackedgeTakenInfo -ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L, - BasicBlock *ExitingBlock) { +/// ComputeExitLimit - Compute the number of times the backedge of the specified +/// loop will execute if it exits via the specified block. +ScalarEvolution::ExitLimit +ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { // Okay, we've chosen an exiting block. See what condition causes us to // exit at this block. @@ -4081,95 +4321,91 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L, } // Proceed to the next level to examine the exit condition expression. - return ComputeBackedgeTakenCountFromExitCond(L, ExitBr->getCondition(), - ExitBr->getSuccessor(0), - ExitBr->getSuccessor(1)); + return ComputeExitLimitFromCond(L, ExitBr->getCondition(), + ExitBr->getSuccessor(0), + ExitBr->getSuccessor(1)); } -/// ComputeBackedgeTakenCountFromExitCond - Compute the number of times the +/// ComputeExitLimitFromCond - Compute the number of times the /// backedge of the specified loop will execute if its exit condition /// were a conditional branch of ExitCond, TBB, and FBB. -ScalarEvolution::BackedgeTakenInfo -ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L, - Value *ExitCond, - BasicBlock *TBB, - BasicBlock *FBB) { +ScalarEvolution::ExitLimit +ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, + Value *ExitCond, + BasicBlock *TBB, + BasicBlock *FBB) { // Check if the controlling expression for this loop is an And or Or. if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) { if (BO->getOpcode() == Instruction::And) { // Recurse on the operands of the and. - BackedgeTakenInfo BTI0 = - ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB); - BackedgeTakenInfo BTI1 = - ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB); + ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB); + ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); if (L->contains(TBB)) { // Both conditions must be true for the loop to continue executing. // Choose the less conservative count. - if (BTI0.Exact == getCouldNotCompute() || - BTI1.Exact == getCouldNotCompute()) + if (EL0.Exact == getCouldNotCompute() || + EL1.Exact == getCouldNotCompute()) BECount = getCouldNotCompute(); else - BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact); - if (BTI0.Max == getCouldNotCompute()) - MaxBECount = BTI1.Max; - else if (BTI1.Max == getCouldNotCompute()) - MaxBECount = BTI0.Max; + BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact); + if (EL0.Max == getCouldNotCompute()) + MaxBECount = EL1.Max; + else if (EL1.Max == getCouldNotCompute()) + MaxBECount = EL0.Max; else - MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max); + MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max); } else { // Both conditions must be true at the same time for the loop to exit. // For now, be conservative. assert(L->contains(FBB) && "Loop block has no successor in loop!"); - if (BTI0.Max == BTI1.Max) - MaxBECount = BTI0.Max; - if (BTI0.Exact == BTI1.Exact) - BECount = BTI0.Exact; + if (EL0.Max == EL1.Max) + MaxBECount = EL0.Max; + if (EL0.Exact == EL1.Exact) + BECount = EL0.Exact; } - return BackedgeTakenInfo(BECount, MaxBECount); + return ExitLimit(BECount, MaxBECount); } if (BO->getOpcode() == Instruction::Or) { // Recurse on the operands of the or. - BackedgeTakenInfo BTI0 = - ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB); - BackedgeTakenInfo BTI1 = - ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB); + ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB); + ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); if (L->contains(FBB)) { // Both conditions must be false for the loop to continue executing. // Choose the less conservative count. - if (BTI0.Exact == getCouldNotCompute() || - BTI1.Exact == getCouldNotCompute()) + if (EL0.Exact == getCouldNotCompute() || + EL1.Exact == getCouldNotCompute()) BECount = getCouldNotCompute(); else - BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact); - if (BTI0.Max == getCouldNotCompute()) - MaxBECount = BTI1.Max; - else if (BTI1.Max == getCouldNotCompute()) - MaxBECount = BTI0.Max; + BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact); + if (EL0.Max == getCouldNotCompute()) + MaxBECount = EL1.Max; + else if (EL1.Max == getCouldNotCompute()) + MaxBECount = EL0.Max; else - MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max); + MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max); } else { // Both conditions must be false at the same time for the loop to exit. // For now, be conservative. assert(L->contains(TBB) && "Loop block has no successor in loop!"); - if (BTI0.Max == BTI1.Max) - MaxBECount = BTI0.Max; - if (BTI0.Exact == BTI1.Exact) - BECount = BTI0.Exact; + if (EL0.Max == EL1.Max) + MaxBECount = EL0.Max; + if (EL0.Exact == EL1.Exact) + BECount = EL0.Exact; } - return BackedgeTakenInfo(BECount, MaxBECount); + return ExitLimit(BECount, MaxBECount); } } // With an icmp, it may be feasible to compute an exact backedge-taken count. // Proceed to the next level to examine the icmp. if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) - return ComputeBackedgeTakenCountFromExitCondICmp(L, ExitCondICmp, TBB, FBB); + return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB); // Check for a constant condition. These are normally stripped out by // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to @@ -4185,17 +4421,17 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L, } // If it's not an integer or pointer comparison then compute it the hard way. - return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB)); + return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); } -/// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of times the +/// ComputeExitLimitFromICmp - Compute the number of times the /// backedge of the specified loop will execute if its exit condition /// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB. -ScalarEvolution::BackedgeTakenInfo -ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, - ICmpInst *ExitCond, - BasicBlock *TBB, - BasicBlock *FBB) { +ScalarEvolution::ExitLimit +ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L, + ICmpInst *ExitCond, + BasicBlock *TBB, + BasicBlock *FBB) { // If the condition was exit on true, convert the condition to exit on false ICmpInst::Predicate Cond; @@ -4207,8 +4443,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, // Handle common loops like: for (X = "string"; *X; ++X) if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0))) if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) { - BackedgeTakenInfo ItCnt = - ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond); + ExitLimit ItCnt = + ComputeLoadConstantCompareExitLimit(LI, RHS, L, Cond); if (ItCnt.hasAnyInfo()) return ItCnt; } @@ -4247,36 +4483,36 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, switch (Cond) { case ICmpInst::ICMP_NE: { // while (X != Y) // Convert to: while (X-Y != 0) - BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEV(LHS, RHS), L); - if (BTI.hasAnyInfo()) return BTI; + ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L); + if (EL.hasAnyInfo()) return EL; break; } case ICmpInst::ICMP_EQ: { // while (X == Y) // Convert to: while (X-Y == 0) - BackedgeTakenInfo BTI = HowFarToNonZero(getMinusSCEV(LHS, RHS), L); - if (BTI.hasAnyInfo()) return BTI; + ExitLimit EL = HowFarToNonZero(getMinusSCEV(LHS, RHS), L); + if (EL.hasAnyInfo()) return EL; break; } case ICmpInst::ICMP_SLT: { - BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, true); - if (BTI.hasAnyInfo()) return BTI; + ExitLimit EL = HowManyLessThans(LHS, RHS, L, true); + if (EL.hasAnyInfo()) return EL; break; } case ICmpInst::ICMP_SGT: { - BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS), + ExitLimit EL = HowManyLessThans(getNotSCEV(LHS), getNotSCEV(RHS), L, true); - if (BTI.hasAnyInfo()) return BTI; + if (EL.hasAnyInfo()) return EL; break; } case ICmpInst::ICMP_ULT: { - BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, false); - if (BTI.hasAnyInfo()) return BTI; + ExitLimit EL = HowManyLessThans(LHS, RHS, L, false); + if (EL.hasAnyInfo()) return EL; break; } case ICmpInst::ICMP_UGT: { - BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS), + ExitLimit EL = HowManyLessThans(getNotSCEV(LHS), getNotSCEV(RHS), L, false); - if (BTI.hasAnyInfo()) return BTI; + if (EL.hasAnyInfo()) return EL; break; } default: @@ -4290,8 +4526,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, #endif break; } - return - ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB)); + return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); } static ConstantInt * @@ -4321,10 +4556,10 @@ GetAddressedElementFromGlobal(GlobalVariable *GV, if (Idx >= CA->getNumOperands()) return 0; // Bogus program Init = cast<Constant>(CA->getOperand(Idx)); } else if (isa<ConstantAggregateZero>(Init)) { - if (const StructType *STy = dyn_cast<StructType>(Init->getType())) { + if (StructType *STy = dyn_cast<StructType>(Init->getType())) { assert(Idx < STy->getNumElements() && "Bad struct index!"); Init = Constant::getNullValue(STy->getElementType(Idx)); - } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Init->getType())) { + } else if (ArrayType *ATy = dyn_cast<ArrayType>(Init->getType())) { if (Idx >= ATy->getNumElements()) return 0; // Bogus program Init = Constant::getNullValue(ATy->getElementType()); } else { @@ -4338,15 +4573,16 @@ GetAddressedElementFromGlobal(GlobalVariable *GV, return Init; } -/// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition of +/// ComputeLoadConstantCompareExitLimit - Given an exit condition of /// 'icmp op load X, cst', try to see if we can compute the backedge /// execution count. -ScalarEvolution::BackedgeTakenInfo -ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount( - LoadInst *LI, - Constant *RHS, - const Loop *L, - ICmpInst::Predicate predicate) { +ScalarEvolution::ExitLimit +ScalarEvolution::ComputeLoadConstantCompareExitLimit( + LoadInst *LI, + Constant *RHS, + const Loop *L, + ICmpInst::Predicate predicate) { + if (LI->isVolatile()) return getCouldNotCompute(); // Check to see if the loaded pointer is a getelementptr of a global. @@ -4431,69 +4667,117 @@ static bool CanConstantFold(const Instruction *I) { return false; } -/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node -/// in the loop that V is derived from. We allow arbitrary operations along the -/// way, but the operands of an operation must either be constants or a value -/// derived from a constant PHI. If this expression does not fit with these -/// constraints, return null. -static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { - // If this is not an instruction, or if this is an instruction outside of the - // loop, it can't be derived from a loop PHI. - Instruction *I = dyn_cast<Instruction>(V); - if (I == 0 || !L->contains(I)) return 0; +/// Determine whether this instruction can constant evolve within this loop +/// assuming its operands can all constant evolve. +static bool canConstantEvolve(Instruction *I, const Loop *L) { + // An instruction outside of the loop can't be derived from a loop PHI. + if (!L->contains(I)) return false; - if (PHINode *PN = dyn_cast<PHINode>(I)) { + if (isa<PHINode>(I)) { if (L->getHeader() == I->getParent()) - return PN; + return true; else // We don't currently keep track of the control flow needed to evaluate // PHIs, so we cannot handle PHIs inside of loops. - return 0; + return false; } // If we won't be able to constant fold this expression even if the operands - // are constants, return early. - if (!CanConstantFold(I)) return 0; + // are constants, bail early. + return CanConstantFold(I); +} + +/// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by +/// recursing through each instruction operand until reaching a loop header phi. +static PHINode * +getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, + DenseMap<Instruction *, PHINode *> &PHIMap) { // Otherwise, we can evaluate this instruction if all of its operands are // constant or derived from a PHI node themselves. PHINode *PHI = 0; - for (unsigned Op = 0, e = I->getNumOperands(); Op != e; ++Op) - if (!isa<Constant>(I->getOperand(Op))) { - PHINode *P = getConstantEvolvingPHI(I->getOperand(Op), L); - if (P == 0) return 0; // Not evolving from PHI - if (PHI == 0) - PHI = P; - else if (PHI != P) - return 0; // Evolving from multiple different PHIs. + for (Instruction::op_iterator OpI = UseInst->op_begin(), + OpE = UseInst->op_end(); OpI != OpE; ++OpI) { + + if (isa<Constant>(*OpI)) continue; + + Instruction *OpInst = dyn_cast<Instruction>(*OpI); + if (!OpInst || !canConstantEvolve(OpInst, L)) return 0; + + PHINode *P = dyn_cast<PHINode>(OpInst); + if (!P) + // If this operand is already visited, reuse the prior result. + // We may have P != PHI if this is the deepest point at which the + // inconsistent paths meet. + P = PHIMap.lookup(OpInst); + if (!P) { + // Recurse and memoize the results, whether a phi is found or not. + // This recursive call invalidates pointers into PHIMap. + P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap); + PHIMap[OpInst] = P; } - + if (P == 0) return 0; // Not evolving from PHI + if (PHI && PHI != P) return 0; // Evolving from multiple different PHIs. + PHI = P; + } // This is a expression evolving from a constant PHI! return PHI; } +/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node +/// in the loop that V is derived from. We allow arbitrary operations along the +/// way, but the operands of an operation must either be constants or a value +/// derived from a constant PHI. If this expression does not fit with these +/// constraints, return null. +static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { + Instruction *I = dyn_cast<Instruction>(V); + if (I == 0 || !canConstantEvolve(I, L)) return 0; + + if (PHINode *PN = dyn_cast<PHINode>(I)) { + return PN; + } + + // Record non-constant instructions contained by the loop. + DenseMap<Instruction *, PHINode *> PHIMap; + return getConstantEvolvingPHIOperands(I, L, PHIMap); +} + /// EvaluateExpression - Given an expression that passes the /// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node /// in the loop has the value PHIVal. If we can't fold this expression for some /// reason, return null. -static Constant *EvaluateExpression(Value *V, Constant *PHIVal, +static Constant *EvaluateExpression(Value *V, const Loop *L, + DenseMap<Instruction *, Constant *> &Vals, const TargetData *TD) { - if (isa<PHINode>(V)) return PHIVal; + // Convenient constant check, but redundant for recursive calls. if (Constant *C = dyn_cast<Constant>(V)) return C; + Instruction *I = cast<Instruction>(V); + if (Constant *C = Vals.lookup(I)) return C; + + assert(!isa<PHINode>(I) && "loop header phis should be mapped to constant"); + assert(canConstantEvolve(I, L) && "cannot evaluate expression in this loop"); + (void)L; std::vector<Constant*> Operands(I->getNumOperands()); for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - Operands[i] = EvaluateExpression(I->getOperand(i), PHIVal, TD); - if (Operands[i] == 0) return 0; + Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i)); + if (!Operand) { + Operands[i] = dyn_cast<Constant>(I->getOperand(i)); + if (!Operands[i]) return 0; + continue; + } + Constant *C = EvaluateExpression(Operand, L, Vals, TD); + Vals[Operand] = C; + if (!C) return 0; + Operands[i] = C; } if (const CmpInst *CI = dyn_cast<CmpInst>(I)) return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], Operands[1], TD); - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), - &Operands[0], Operands.size(), TD); + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD); } /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is @@ -4514,6 +4798,9 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, Constant *&RetVal = ConstantEvolutionLoopExitValue[PN]; + // FIXME: Nick's fix for PR11034 will seed constants for multiple header phis. + DenseMap<Instruction *, Constant *> CurrentIterVals; + // Since the loop is canonicalized, the PHI node must have two entries. One // entry must be a constant (coming in from outside of the loop), and the // second must be derived from the same PHI. @@ -4522,6 +4809,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge)); if (StartCST == 0) return RetVal = 0; // Must be a constant. + CurrentIterVals[PN] = StartCST; Value *BEValue = PN->getIncomingValue(SecondIsBackedge); if (getConstantEvolvingPHI(BEValue, L) != PN && @@ -4534,29 +4822,31 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, unsigned NumIterations = BEs.getZExtValue(); // must be in range unsigned IterationNum = 0; - for (Constant *PHIVal = StartCST; ; ++IterationNum) { + for (; ; ++IterationNum) { if (IterationNum == NumIterations) - return RetVal = PHIVal; // Got exit value! + return RetVal = CurrentIterVals[PN]; // Got exit value! // Compute the value of the PHI node for the next iteration. - Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD); - if (NextPHI == PHIVal) + // EvaluateExpression adds non-phi values to the CurrentIterVals map. + Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); + if (NextPHI == CurrentIterVals[PN]) return RetVal = NextPHI; // Stopped evolving! if (NextPHI == 0) return 0; // Couldn't evaluate! - PHIVal = NextPHI; + DenseMap<Instruction *, Constant *> NextIterVals; + NextIterVals[PN] = NextPHI; + CurrentIterVals.swap(NextIterVals); } } -/// ComputeBackedgeTakenCountExhaustively - If the loop is known to execute a +/// ComputeExitCountExhaustively - If the loop is known to execute a /// constant number of times (the condition evolves only from constants), /// try to evaluate a few iterations of the loop until we get the exit /// condition gets a value of ExitWhen (true or false). If we cannot /// evaluate the trip count of the loop, return getCouldNotCompute(). -const SCEV * -ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L, - Value *Cond, - bool ExitWhen) { +const SCEV * ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, + Value *Cond, + bool ExitWhen) { PHINode *PN = getConstantEvolvingPHI(Cond, L); if (PN == 0) return getCouldNotCompute(); @@ -4583,8 +4873,10 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L, unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. for (Constant *PHIVal = StartCST; IterationNum != MaxIterations; ++IterationNum) { + DenseMap<Instruction *, Constant *> PHIValMap; + PHIValMap[PN] = PHIVal; ConstantInt *CondVal = - dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, PHIVal, TD)); + dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, PHIValMap, TD)); // Couldn't symbolically evaluate. if (!CondVal) return getCouldNotCompute(); @@ -4595,7 +4887,7 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L, } // Compute the value of the PHI node for the next iteration. - Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD); + Constant *NextPHI = EvaluateExpression(BEValue, L, PHIValMap, TD); if (NextPHI == 0 || NextPHI == PHIVal) return getCouldNotCompute();// Couldn't evaluate or not making progress... PHIVal = NextPHI; @@ -4703,7 +4995,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { Operands[0], Operands[1], TD); else C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), - &Operands[0], Operands.size(), TD); + Operands, TD); if (!C) return V; return getSCEV(C); } @@ -4925,7 +5217,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { // Compute the two solutions for the quadratic formula. // The divisions must be performed as signed divisions. APInt NegB(-B); - APInt TwoA( A << 1 ); + APInt TwoA(A << 1); if (TwoA.isMinValue()) { const SCEV *CNC = SE.getCouldNotCompute(); return std::make_pair(CNC, CNC); @@ -4940,7 +5232,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { return std::make_pair(SE.getConstant(Solution1), SE.getConstant(Solution2)); - } // end APIntOps namespace + } // end APIntOps namespace } /// HowFarToZero - Return the number of times a backedge comparing the specified @@ -4950,7 +5242,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { /// now expressed as a single expression, V = x-y. So the exit test is /// effectively V != 0. We know and take advantage of the fact that this /// expression only being used in a comparison by zero context. -ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::ExitLimit ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { // If the value is a constant if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) { @@ -5034,8 +5326,19 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { // Handle unitary steps, which cannot wraparound. // 1*N = -Start; -1*N = Start (mod 2^BW), so: // N = Distance (as unsigned) - if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) - return Distance; + if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) { + ConstantRange CR = getUnsignedRange(Start); + const SCEV *MaxBECount; + if (!CountDown && CR.getUnsignedMin().isMinValue()) + // When counting up, the worst starting value is 1, not 0. + MaxBECount = CR.getUnsignedMax().isMinValue() + ? getConstant(APInt::getMinValue(CR.getBitWidth())) + : getConstant(APInt::getMaxValue(CR.getBitWidth())); + else + MaxBECount = getConstant(CountDown ? CR.getUnsignedMax() + : -CR.getUnsignedMin()); + return ExitLimit(Distance, MaxBECount); + } // If the recurrence is known not to wraparound, unsigned divide computes the // back edge count. We know that the value will either become zero (and thus @@ -5062,7 +5365,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { /// HowFarToNonZero - Return the number of times a backedge checking the /// specified value for nonzero will execute. If not computable, return /// CouldNotCompute -ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::ExitLimit ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) { // Loops that look like: while (X == 0) are very strange indeed. We don't // handle them yet except for the trivial case. This could be expanded in the @@ -5741,7 +6044,7 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start, assert(!isKnownNegative(Step) && "This code doesn't handle negative strides yet!"); - const Type *Ty = Start->getType(); + Type *Ty = Start->getType(); // When Start == End, we have an exact BECount == 0. Short-circuit this case // here because SCEV may not be able to determine that the unsigned division @@ -5760,7 +6063,7 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start, if (!NoWrap) { // Check Add for unsigned overflow. // TODO: More sophisticated things could be done here. - const Type *WideTy = IntegerType::get(getContext(), + Type *WideTy = IntegerType::get(getContext(), getTypeSizeInBits(Ty) + 1); const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy); const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy); @@ -5775,7 +6078,7 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start, /// HowManyLessThans - Return the number of times a backedge containing the /// specified less-than comparison will execute. If not computable, return /// CouldNotCompute. -ScalarEvolution::BackedgeTakenInfo +ScalarEvolution::ExitLimit ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool isSigned) { // Only handle: "ADDREC < LoopInvariant". @@ -5882,7 +6185,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, if (isa<SCEVCouldNotCompute>(MaxBECount)) MaxBECount = BECount; - return BackedgeTakenInfo(BECount, MaxBECount); + return ExitLimit(BECount, MaxBECount); } return getCouldNotCompute(); @@ -6090,6 +6393,15 @@ void ScalarEvolution::releaseMemory() { FirstUnknown = 0; ValueExprMap.clear(); + + // Free any extra memory created for ExitNotTakenInfo in the unlikely event + // that a loop had multiple computable exits. + for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I = + BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); + I != E; ++I) { + I->second.clear(); + } + BackedgeTakenCounts.clear(); ConstantEvolutionLoopExitValue.clear(); ValuesAtScopes.clear(); diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index befe6d2..47f0f32 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/STLExtras.h" @@ -26,7 +27,7 @@ using namespace llvm; /// reusing an existing cast if a suitable one exists, moving an existing /// cast if a suitable one exists but isn't in the right place, or /// creating a new one. -Value *SCEVExpander::ReuseOrCreateCast(Value *V, const Type *Ty, +Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, Instruction::CastOps Op, BasicBlock::iterator IP) { // Check to see if there is already a cast! @@ -62,7 +63,7 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, const Type *Ty, /// InsertNoopCastOfTo - Insert a cast of V to the specified type, /// which must be possible with a noop cast, doing what we can to share /// the casts. -Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) { +Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false); assert((Op == Instruction::BitCast || Op == Instruction::PtrToInt || @@ -103,7 +104,8 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) { while ((isa<BitCastInst>(IP) && isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) && cast<BitCastInst>(IP)->getOperand(0) != A) || - isa<DbgInfoIntrinsic>(IP)) + isa<DbgInfoIntrinsic>(IP) || + isa<LandingPadInst>(IP)) ++IP; return ReuseOrCreateCast(A, Ty, Op, IP); } @@ -113,7 +115,9 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) { BasicBlock::iterator IP = I; ++IP; if (InvokeInst *II = dyn_cast<InvokeInst>(I)) IP = II->getNormalDest()->begin(); - while (isa<PHINode>(IP) || isa<DbgInfoIntrinsic>(IP)) ++IP; + while (isa<PHINode>(IP) || isa<DbgInfoIntrinsic>(IP) || + isa<LandingPadInst>(IP)) + ++IP; return ReuseOrCreateCast(I, Ty, Op, IP); } @@ -160,7 +164,7 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, } // If we haven't found this binop, insert it. - Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS, "tmp")); + Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS)); BO->setDebugLoc(SaveInsertPt->getDebugLoc()); rememberInstruction(BO); @@ -277,7 +281,7 @@ static bool FactorOutConstant(const SCEV *&S, /// the list. /// static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops, - const Type *Ty, + Type *Ty, ScalarEvolution &SE) { unsigned NumAddRecs = 0; for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i) @@ -306,7 +310,7 @@ static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops, /// into GEP indices. /// static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops, - const Type *Ty, + Type *Ty, ScalarEvolution &SE) { // Find the addrecs. SmallVector<const SCEV *, 8> AddRecs; @@ -365,10 +369,10 @@ static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops, /// Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, const SCEV *const *op_end, - const PointerType *PTy, - const Type *Ty, + PointerType *PTy, + Type *Ty, Value *V) { - const Type *ElTy = PTy->getElementType(); + Type *ElTy = PTy->getElementType(); SmallVector<Value *, 4> GepIndices; SmallVector<const SCEV *, 8> Ops(op_begin, op_end); bool AnyNonZeroIndices = false; @@ -423,7 +427,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, GepIndices.push_back(Scaled); // Collect struct field index operands. - while (const StructType *STy = dyn_cast<StructType>(ElTy)) { + while (StructType *STy = dyn_cast<StructType>(ElTy)) { bool FoundFieldNo = false; // An empty struct has no fields. if (STy->getNumElements() == 0) break; @@ -451,7 +455,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // appropriate struct type. for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Ops[i])) { - const Type *CTy; + Type *CTy; Constant *FieldNo; if (U->isOffsetOf(CTy, FieldNo) && CTy == STy) { GepIndices.push_back(FieldNo); @@ -474,7 +478,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, } } - if (const ArrayType *ATy = dyn_cast<ArrayType>(ElTy)) + if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy)) ElTy = ATy->getElementType(); else break; @@ -494,7 +498,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // Fold a GEP with constant operands. if (Constant *CLHS = dyn_cast<Constant>(V)) if (Constant *CRHS = dyn_cast<Constant>(Idx)) - return ConstantExpr::getGetElementPtr(CLHS, &CRHS, 1); + return ConstantExpr::getGetElementPtr(CLHS, CRHS); // Do a quick scan to see if we have this GEP nearby. If so, reuse it. unsigned ScanLimit = 6; @@ -572,8 +576,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, if (V->getType() != PTy) Casted = InsertNoopCastOfTo(Casted, PTy); Value *GEP = Builder.CreateGEP(Casted, - GepIndices.begin(), - GepIndices.end(), + GepIndices, "scevgep"); Ops.push_back(SE.getUnknown(GEP)); rememberInstruction(GEP); @@ -691,7 +694,7 @@ public: } Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Type *Ty = SE.getEffectiveSCEVType(S->getType()); // Collect all the add operands in a loop, along with their associated loops. // Iterate in reverse so that constants are emitted last, all else equal, and @@ -717,7 +720,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { // This is the first operand. Just expand it. Sum = expand(Op); ++I; - } else if (const PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) { + } else if (PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) { // The running sum expression is a pointer. Try to form a getelementptr // at this level with that as the base. SmallVector<const SCEV *, 4> NewOps; @@ -731,7 +734,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { NewOps.push_back(X); } Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum); - } else if (const PointerType *PTy = dyn_cast<PointerType>(Op->getType())) { + } else if (PointerType *PTy = dyn_cast<PointerType>(Op->getType())) { // The running sum is an integer, and there's a pointer at this level. // Try to form a getelementptr. If the running sum is instructions, // use a SCEVUnknown to avoid re-analyzing them. @@ -762,7 +765,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { } Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Type *Ty = SE.getEffectiveSCEVType(S->getType()); // Collect all the mul operands in a loop, along with their associated loops. // Iterate in reverse so that constants are emitted last, all else equal. @@ -804,7 +807,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { } Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Type *Ty = SE.getEffectiveSCEVType(S->getType()); Value *LHS = expandCodeFor(S->getLHS(), Ty); if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) { @@ -841,81 +844,141 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, } } +/// Determine if this is a well-behaved chain of instructions leading back to +/// the PHI. If so, it may be reused by expanded expressions. +bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, + const Loop *L) { + if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) || + (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV))) + return false; + // If any of the operands don't dominate the insert position, bail. + // Addrec operands are always loop-invariant, so this can only happen + // if there are instructions which haven't been hoisted. + if (L == IVIncInsertLoop) { + for (User::op_iterator OI = IncV->op_begin()+1, + OE = IncV->op_end(); OI != OE; ++OI) + if (Instruction *OInst = dyn_cast<Instruction>(OI)) + if (!SE.DT->dominates(OInst, IVIncInsertPos)) + return false; + } + // Advance to the next instruction. + IncV = dyn_cast<Instruction>(IncV->getOperand(0)); + if (!IncV) + return false; + + if (IncV->mayHaveSideEffects()) + return false; + + if (IncV != PN) + return true; + + return isNormalAddRecExprPHI(PN, IncV, L); +} + +/// Determine if this cyclic phi is in a form that would have been generated by +/// LSR. We don't care if the phi was actually expanded in this pass, as long +/// as it is in a low-cost form, for example, no implied multiplication. This +/// should match any patterns generated by getAddRecExprPHILiterally and +/// expandAddtoGEP. +bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, + const Loop *L) { + switch (IncV->getOpcode()) { + // Check for a simple Add/Sub or GEP of a loop invariant step. + case Instruction::Add: + case Instruction::Sub: + return IncV->getOperand(0) == PN + && L->isLoopInvariant(IncV->getOperand(1)); + case Instruction::BitCast: + IncV = dyn_cast<GetElementPtrInst>(IncV->getOperand(0)); + if (!IncV) + return false; + // fall-thru to GEP handling + case Instruction::GetElementPtr: { + // This must be a pointer addition of constants (pretty) or some number of + // address-size elements (ugly). + for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end(); + I != E; ++I) { + if (isa<Constant>(*I)) + continue; + // ugly geps have 2 operands. + // i1* is used by the expander to represent an address-size element. + if (IncV->getNumOperands() != 2) + return false; + unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace(); + if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS) + && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS)) + return false; + // Ensure the operands dominate the insertion point. I don't know of a + // case when this would not be true, so this is somewhat untested. + if (L == IVIncInsertLoop) { + for (User::op_iterator OI = IncV->op_begin()+1, + OE = IncV->op_end(); OI != OE; ++OI) + if (Instruction *OInst = dyn_cast<Instruction>(OI)) + if (!SE.DT->dominates(OInst, IVIncInsertPos)) + return false; + } + break; + } + IncV = dyn_cast<Instruction>(IncV->getOperand(0)); + if (IncV && IncV->getOpcode() == Instruction::BitCast) + IncV = dyn_cast<Instruction>(IncV->getOperand(0)); + return IncV == PN; + } + default: + return false; + } +} + /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand /// the base addrec, which is the addrec without any non-loop-dominating /// values, and return the PHI. PHINode * SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, const Loop *L, - const Type *ExpandTy, - const Type *IntTy) { + Type *ExpandTy, + Type *IntTy) { assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position"); // Reuse a previously-inserted PHI, if present. - for (BasicBlock::iterator I = L->getHeader()->begin(); - PHINode *PN = dyn_cast<PHINode>(I); ++I) - if (SE.isSCEVable(PN->getType()) && - (SE.getEffectiveSCEVType(PN->getType()) == - SE.getEffectiveSCEVType(Normalized->getType())) && - SE.getSCEV(PN) == Normalized) - if (BasicBlock *LatchBlock = L->getLoopLatch()) { - Instruction *IncV = - cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)); - - // Determine if this is a well-behaved chain of instructions leading - // back to the PHI. It probably will be, if we're scanning an inner - // loop already visited by LSR for example, but it wouldn't have - // to be. + BasicBlock *LatchBlock = L->getLoopLatch(); + if (LatchBlock) { + for (BasicBlock::iterator I = L->getHeader()->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) { + if (!SE.isSCEVable(PN->getType()) || + (SE.getEffectiveSCEVType(PN->getType()) != + SE.getEffectiveSCEVType(Normalized->getType())) || + SE.getSCEV(PN) != Normalized) + continue; + + Instruction *IncV = + cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)); + + if (LSRMode) { + if (!isExpandedAddRecExprPHI(PN, IncV, L)) + continue; + } + else { + if (!isNormalAddRecExprPHI(PN, IncV, L)) + continue; + } + // Ok, the add recurrence looks usable. + // Remember this PHI, even in post-inc mode. + InsertedValues.insert(PN); + // Remember the increment. + rememberInstruction(IncV); + if (L == IVIncInsertLoop) do { - if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) || - (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV))) { - IncV = 0; + if (SE.DT->dominates(IncV, IVIncInsertPos)) break; - } - // If any of the operands don't dominate the insert position, bail. - // Addrec operands are always loop-invariant, so this can only happen - // if there are instructions which haven't been hoisted. - if (L == IVIncInsertLoop) { - for (User::op_iterator OI = IncV->op_begin()+1, - OE = IncV->op_end(); OI != OE; ++OI) - if (Instruction *OInst = dyn_cast<Instruction>(OI)) - if (!SE.DT->dominates(OInst, IVIncInsertPos)) { - IncV = 0; - break; - } - } - if (!IncV) - break; - // Advance to the next instruction. - IncV = dyn_cast<Instruction>(IncV->getOperand(0)); - if (!IncV) - break; - if (IncV->mayHaveSideEffects()) { - IncV = 0; - break; - } + // Make sure the increment is where we want it. But don't move it + // down past a potential existing post-inc user. + IncV->moveBefore(IVIncInsertPos); + IVIncInsertPos = IncV; + IncV = cast<Instruction>(IncV->getOperand(0)); } while (IncV != PN); - - if (IncV) { - // Ok, the add recurrence looks usable. - // Remember this PHI, even in post-inc mode. - InsertedValues.insert(PN); - // Remember the increment. - IncV = cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)); - rememberInstruction(IncV); - if (L == IVIncInsertLoop) - do { - if (SE.DT->dominates(IncV, IVIncInsertPos)) - break; - // Make sure the increment is where we want it. But don't move it - // down past a potential existing post-inc user. - IncV->moveBefore(IVIncInsertPos); - IVIncInsertPos = IncV; - IncV = cast<Instruction>(IncV->getOperand(0)); - } while (IncV != PN); - return PN; - } - } + return PN; + } + } // Save the original insertion point so we can restore it when we're done. BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); @@ -969,7 +1032,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, Value *IncV; // If the PHI is a pointer, use a GEP, otherwise use an add or sub. if (isPointer) { - const PointerType *GEPPtrTy = cast<PointerType>(ExpandTy); + PointerType *GEPPtrTy = cast<PointerType>(ExpandTy); // If the step isn't constant, don't use an implicitly scaled GEP, because // that would require a multiply inside the loop. if (!isa<ConstantInt>(StepV)) @@ -978,7 +1041,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, const SCEV *const StepArray[1] = { SE.getSCEV(StepV) }; IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN); if (IncV->getType() != PN->getType()) { - IncV = Builder.CreateBitCast(IncV, PN->getType(), "tmp"); + IncV = Builder.CreateBitCast(IncV, PN->getType()); rememberInstruction(IncV); } } else { @@ -1001,8 +1064,8 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, } Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { - const Type *STy = S->getType(); - const Type *IntTy = SE.getEffectiveSCEVType(STy); + Type *STy = S->getType(); + Type *IntTy = SE.getEffectiveSCEVType(STy); const Loop *L = S->getLoop(); // Determine a normalized form of this expression, which is the expression @@ -1045,7 +1108,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Expand the core addrec. If we need post-loop scaling, force it to // expand to an integer type to avoid the need for additional casting. - const Type *ExpandTy = PostLoopScale ? IntTy : STy; + Type *ExpandTy = PostLoopScale ? IntTy : STy; PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy); // Accommodate post-inc mode, if necessary. @@ -1057,6 +1120,14 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { BasicBlock *LatchBlock = L->getLoopLatch(); assert(LatchBlock && "PostInc mode requires a unique loop latch!"); Result = PN->getIncomingValueForBlock(LatchBlock); + + // For an expansion to use the postinc form, the client must call + // expandCodeFor with an InsertPoint that is either outside the PostIncLoop + // or dominated by IVIncInsertPos. + assert((!isa<Instruction>(Result) || + SE.DT->dominates(cast<Instruction>(Result), + Builder.GetInsertPoint())) && + "postinc expansion does not dominate use"); } // Re-apply any non-loop-dominating scale. @@ -1069,7 +1140,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Re-apply any non-loop-dominating offset. if (PostLoopOffset) { - if (const PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) { + if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) { const SCEV *const OffsetArray[1] = { PostLoopOffset }; Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result); } else { @@ -1086,7 +1157,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { if (!CanonicalMode) return expandAddRecExprLiterally(S); - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Type *Ty = SE.getEffectiveSCEVType(S->getType()); const Loop *L = S->getLoop(); // First check for an existing canonical IV in a suitable type. @@ -1110,7 +1181,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); BasicBlock::iterator NewInsertPt = llvm::next(BasicBlock::iterator(cast<Instruction>(V))); - while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt)) + while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt) || + isa<LandingPadInst>(NewInsertPt)) ++NewInsertPt; V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0, NewInsertPt); @@ -1132,7 +1204,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { // Dig into the expression to find the pointer base for a GEP. ExposePointerBase(Base, RestArray[0], SE); // If we found a pointer, expand the AddRec with a GEP. - if (const PointerType *PTy = dyn_cast<PointerType>(Base->getType())) { + if (PointerType *PTy = dyn_cast<PointerType>(Base->getType())) { // Make sure the Base isn't something exotic, such as a multiplied // or divided pointer value. In those cases, the result type isn't // actually a pointer type. @@ -1216,35 +1288,35 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { } Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) { - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Type *Ty = SE.getEffectiveSCEVType(S->getType()); Value *V = expandCodeFor(S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType())); - Value *I = Builder.CreateTrunc(V, Ty, "tmp"); + Value *I = Builder.CreateTrunc(V, Ty); rememberInstruction(I); return I; } Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) { - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Type *Ty = SE.getEffectiveSCEVType(S->getType()); Value *V = expandCodeFor(S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType())); - Value *I = Builder.CreateZExt(V, Ty, "tmp"); + Value *I = Builder.CreateZExt(V, Ty); rememberInstruction(I); return I; } Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) { - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); + Type *Ty = SE.getEffectiveSCEVType(S->getType()); Value *V = expandCodeFor(S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType())); - Value *I = Builder.CreateSExt(V, Ty, "tmp"); + Value *I = Builder.CreateSExt(V, Ty); rememberInstruction(I); return I; } Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); - const Type *Ty = LHS->getType(); + Type *Ty = LHS->getType(); for (int i = S->getNumOperands()-2; i >= 0; --i) { // In the case of mixed integer and pointer types, do the // rest of the comparisons as integer. @@ -1253,7 +1325,7 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { LHS = InsertNoopCastOfTo(LHS, Ty); } Value *RHS = expandCodeFor(S->getOperand(i), Ty); - Value *ICmp = Builder.CreateICmpSGT(LHS, RHS, "tmp"); + Value *ICmp = Builder.CreateICmpSGT(LHS, RHS); rememberInstruction(ICmp); Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); rememberInstruction(Sel); @@ -1268,7 +1340,7 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); - const Type *Ty = LHS->getType(); + Type *Ty = LHS->getType(); for (int i = S->getNumOperands()-2; i >= 0; --i) { // In the case of mixed integer and pointer types, do the // rest of the comparisons as integer. @@ -1277,7 +1349,7 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { LHS = InsertNoopCastOfTo(LHS, Ty); } Value *RHS = expandCodeFor(S->getOperand(i), Ty); - Value *ICmp = Builder.CreateICmpUGT(LHS, RHS, "tmp"); + Value *ICmp = Builder.CreateICmpUGT(LHS, RHS); rememberInstruction(ICmp); Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax"); rememberInstruction(Sel); @@ -1290,7 +1362,7 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { return LHS; } -Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty, +Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I) { BasicBlock::iterator IP = I; while (isInsertedInstruction(IP) || isa<DbgInfoIntrinsic>(IP)) @@ -1299,7 +1371,7 @@ Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty, return expandCodeFor(SH, Ty); } -Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty) { +Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) { // Expand the code for this SCEV. Value *V = expand(SH); if (Ty) { @@ -1325,7 +1397,7 @@ Value *SCEVExpander::expand(const SCEV *S) { // after the PHIs (and after any other instructions that we've inserted // there) so that it is guaranteed to dominate any user inside the loop. if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) - InsertPt = L->getHeader()->getFirstNonPHI(); + InsertPt = L->getHeader()->getFirstInsertionPt(); while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt)) InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); break; @@ -1346,8 +1418,12 @@ Value *SCEVExpander::expand(const SCEV *S) { Value *V = visit(S); // Remember the expanded value for this SCEV at this location. - if (PostIncLoops.empty()) - InsertedExpressions[std::make_pair(S, InsertPt)] = V; + // + // This is independent of PostIncLoops. The mapped value simply materializes + // the expression at this insertion point. If the mapped value happened to be + // a postinc expansion, it could be reused by a non postinc user, but only if + // its insertion point was already at the head of the loop. + InsertedExpressions[std::make_pair(S, InsertPt)] = V; restoreInsertPoint(SaveInsertBB, SaveInsertPt); return V; @@ -1384,7 +1460,7 @@ void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) { /// starts at zero and steps by one on each iteration. PHINode * SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, - const Type *Ty) { + Type *Ty) { assert(Ty->isIntegerTy() && "Can only insert integer induction variables!"); // Build a SCEV for {0,+,1}<L>. @@ -1401,3 +1477,102 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, return V; } + +/// hoistStep - Attempt to hoist an IV increment above a potential use. +/// +/// To successfully hoist, two criteria must be met: +/// - IncV operands dominate InsertPos and +/// - InsertPos dominates IncV +/// +/// Meeting the second condition means that we don't need to check all of IncV's +/// existing uses (it's moving up in the domtree). +/// +/// This does not yet recursively hoist the operands, although that would +/// not be difficult. +/// +/// This does not require a SCEVExpander instance and could be replaced by a +/// general code-insertion helper. +bool SCEVExpander::hoistStep(Instruction *IncV, Instruction *InsertPos, + const DominatorTree *DT) { + if (DT->dominates(IncV, InsertPos)) + return true; + + if (!DT->dominates(InsertPos->getParent(), IncV->getParent())) + return false; + + if (IncV->mayHaveSideEffects()) + return false; + + // Attempt to hoist IncV + for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end(); + OI != OE; ++OI) { + Instruction *OInst = dyn_cast<Instruction>(OI); + if (OInst && !DT->dominates(OInst, InsertPos)) + return false; + } + IncV->moveBefore(InsertPos); + return true; +} + +/// replaceCongruentIVs - Check for congruent phis in this loop header and +/// replace them with their most canonical representative. Return the number of +/// phis eliminated. +/// +/// This does not depend on any SCEVExpander state but should be used in +/// the same context that SCEVExpander is used. +unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, + SmallVectorImpl<WeakVH> &DeadInsts) { + unsigned NumElim = 0; + DenseMap<const SCEV *, PHINode *> ExprToIVMap; + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { + PHINode *Phi = cast<PHINode>(I); + if (!SE.isSCEVable(Phi->getType())) + continue; + + PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)]; + if (!OrigPhiRef) { + OrigPhiRef = Phi; + continue; + } + + // If one phi derives from the other via GEPs, types may differ. + // We could consider adding a bitcast here to handle it. + if (OrigPhiRef->getType() != Phi->getType()) + continue; + + if (BasicBlock *LatchBlock = L->getLoopLatch()) { + Instruction *OrigInc = + cast<Instruction>(OrigPhiRef->getIncomingValueForBlock(LatchBlock)); + Instruction *IsomorphicInc = + cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock)); + + // If this phi is more canonical, swap it with the original. + if (!isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L) + && isExpandedAddRecExprPHI(Phi, IsomorphicInc, L)) { + std::swap(OrigPhiRef, Phi); + std::swap(OrigInc, IsomorphicInc); + } + // Replacing the congruent phi is sufficient because acyclic redundancy + // elimination, CSE/GVN, should handle the rest. However, once SCEV proves + // that a phi is congruent, it's often the head of an IV user cycle that + // is isomorphic with the original phi. So it's worth eagerly cleaning up + // the common case of a single IV increment. + if (OrigInc != IsomorphicInc && + OrigInc->getType() == IsomorphicInc->getType() && + SE.getSCEV(OrigInc) == SE.getSCEV(IsomorphicInc) && + hoistStep(OrigInc, IsomorphicInc, DT)) { + DEBUG_WITH_TYPE(DebugType, dbgs() + << "INDVARS: Eliminated congruent iv.inc: " + << *IsomorphicInc << '\n'); + IsomorphicInc->replaceAllUsesWith(OrigInc); + DeadInsts.push_back(IsomorphicInc); + } + } + DEBUG_WITH_TYPE(DebugType, dbgs() + << "INDVARS: Eliminated congruent iv: " << *Phi << '\n'); + ++NumElim; + Phi->replaceAllUsesWith(OrigPhiRef); + DeadInsts.push_back(Phi); + } + return NumElim; +} diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp index 60e630a..c66ecd6 100644 --- a/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -60,20 +60,40 @@ static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand, return true; } -const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, - const SCEV *S, - Instruction *User, - Value *OperandValToReplace, - PostIncLoopSet &Loops, - ScalarEvolution &SE, - DominatorTree &DT) { - if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S)) - return S; +namespace { + +/// Hold the state used during post-inc expression transformation, including a +/// map of transformed expressions. +class PostIncTransform { + TransformKind Kind; + PostIncLoopSet &Loops; + ScalarEvolution &SE; + DominatorTree &DT; + + DenseMap<const SCEV*, const SCEV*> Transformed; + +public: + PostIncTransform(TransformKind kind, PostIncLoopSet &loops, + ScalarEvolution &se, DominatorTree &dt): + Kind(kind), Loops(loops), SE(se), DT(dt) {} + + const SCEV *TransformSubExpr(const SCEV *S, Instruction *User, + Value *OperandValToReplace); + +protected: + const SCEV *TransformImpl(const SCEV *S, Instruction *User, + Value *OperandValToReplace); +}; + +} // namespace + +/// Implement post-inc transformation for all valid expression types. +const SCEV *PostIncTransform:: +TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) { if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) { const SCEV *O = X->getOperand(); - const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace, - Loops, SE, DT); + const SCEV *N = TransformSubExpr(O, User, OperandValToReplace); if (O != N) switch (S->getSCEVType()) { case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType()); @@ -93,9 +113,7 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, // Transform each operand. for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); I != E; ++I) { - const SCEV *O = *I; - const SCEV *N = TransformForPostIncUse(Kind, O, LUser, 0, Loops, SE, DT); - Operands.push_back(N); + Operands.push_back(TransformSubExpr(*I, LUser, 0)); } // Conservatively use AnyWrap until/unless we need FlagNW. const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); @@ -104,8 +122,8 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, case NormalizeAutodetect: if (IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) { const SCEV *TransformedStep = - TransformForPostIncUse(Kind, AR->getStepRecurrence(SE), - User, OperandValToReplace, Loops, SE, DT); + TransformSubExpr(AR->getStepRecurrence(SE), + User, OperandValToReplace); Result = SE.getMinusSCEV(Result, TransformedStep); Loops.insert(L); } @@ -114,24 +132,20 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, // sometimes fails to canonicalize two equal SCEVs to exactly the same // form. It's possibly a pessimization when this happens, but it isn't a // correctness problem, so disable this assert for now. - assert(S == TransformForPostIncUse(Denormalize, Result, - User, OperandValToReplace, - Loops, SE, DT) && + assert(S == TransformSubExpr(Result, User, OperandValToReplace) && "SCEV normalization is not invertible!"); #endif break; case Normalize: if (Loops.count(L)) { const SCEV *TransformedStep = - TransformForPostIncUse(Kind, AR->getStepRecurrence(SE), - User, OperandValToReplace, Loops, SE, DT); + TransformSubExpr(AR->getStepRecurrence(SE), + User, OperandValToReplace); Result = SE.getMinusSCEV(Result, TransformedStep); } #if 0 // See the comment on the assert above. - assert(S == TransformForPostIncUse(Denormalize, Result, - User, OperandValToReplace, - Loops, SE, DT) && + assert(S == TransformSubExpr(Result, User, OperandValToReplace) && "SCEV normalization is not invertible!"); #endif break; @@ -150,8 +164,7 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end(); I != E; ++I) { const SCEV *O = *I; - const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace, - Loops, SE, DT); + const SCEV *N = TransformSubExpr(O, User, OperandValToReplace); Changed |= N != O; Operands.push_back(N); } @@ -170,10 +183,8 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) { const SCEV *LO = X->getLHS(); const SCEV *RO = X->getRHS(); - const SCEV *LN = TransformForPostIncUse(Kind, LO, User, OperandValToReplace, - Loops, SE, DT); - const SCEV *RN = TransformForPostIncUse(Kind, RO, User, OperandValToReplace, - Loops, SE, DT); + const SCEV *LN = TransformSubExpr(LO, User, OperandValToReplace); + const SCEV *RN = TransformSubExpr(RO, User, OperandValToReplace); if (LO != LN || RO != RN) return SE.getUDivExpr(LN, RN); return S; @@ -182,3 +193,33 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, llvm_unreachable("Unexpected SCEV kind!"); return 0; } + +/// Manage recursive transformation across an expression DAG. Revisiting +/// expressions would lead to exponential recursion. +const SCEV *PostIncTransform:: +TransformSubExpr(const SCEV *S, Instruction *User, Value *OperandValToReplace) { + + if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S)) + return S; + + const SCEV *Result = Transformed.lookup(S); + if (Result) + return Result; + + Result = TransformImpl(S, User, OperandValToReplace); + Transformed[S] = Result; + return Result; +} + +/// Top level driver for transforming an expression DAG into its requested +/// post-inc form (either "Normalized" or "Denormalized". +const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, + const SCEV *S, + Instruction *User, + Value *OperandValToReplace, + PostIncLoopSet &Loops, + ScalarEvolution &SE, + DominatorTree &DT) { + PostIncTransform Transform(Kind, Loops, SE, DT); + return Transform.TransformSubExpr(S, User, OperandValToReplace); +} diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 455c910..4d94f61 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -34,7 +34,7 @@ const unsigned MaxDepth = 6; /// getBitWidth - Returns the bitwidth of the given scalar or pointer type (if /// unknown returns 0). For vector types, returns the element type's bitwidth. -static unsigned getBitWidth(const Type *Ty, const TargetData *TD) { +static unsigned getBitWidth(Type *Ty, const TargetData *TD) { if (unsigned BitWidth = Ty->getScalarSizeInBits()) return BitWidth; assert(isa<PointerType>(Ty) && "Expected a pointer type!"); @@ -103,7 +103,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { unsigned Align = GV->getAlignment(); if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) { - const Type *ObjectType = GV->getType()->getElementType(); + Type *ObjectType = GV->getType()->getElementType(); // If the object is defined in the current Module, we'll be giving // it the preferred alignment. Otherwise, we have to assume that it // may only have the minimum ABI alignment. @@ -268,7 +268,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // FALL THROUGH and handle them the same as zext/trunc. case Instruction::ZExt: case Instruction::Trunc: { - const Type *SrcTy = I->getOperand(0)->getType(); + Type *SrcTy = I->getOperand(0)->getType(); unsigned SrcBitWidth; // Note that we handle pointer operands here because of inttoptr/ptrtoint @@ -291,7 +291,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, return; } case Instruction::BitCast: { - const Type *SrcTy = I->getOperand(0)->getType(); + Type *SrcTy = I->getOperand(0)->getType(); if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) @@ -559,7 +559,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, gep_type_iterator GTI = gep_type_begin(I); for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { Value *Index = I->getOperand(i); - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { // Handle struct member offset arithmetic. if (!TD) return; const StructLayout *SL = TD->getStructLayout(STy); @@ -569,7 +569,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, CountTrailingZeros_64(Offset)); } else { // Handle array index arithmetic. - const Type *IndexedTy = GTI.getIndexedType(); + Type *IndexedTy = GTI.getIndexedType(); if (!IndexedTy->isSized()) return; unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits(); uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1; @@ -898,7 +898,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, assert((TD || V->getType()->isIntOrIntVectorTy()) && "ComputeNumSignBits requires a TargetData object to operate " "on non-integer values!"); - const Type *Ty = V->getType(); + Type *Ty = V->getType(); unsigned TyBits = TD ? TD->getTypeSizeInBits(V->getType()->getScalarType()) : Ty->getScalarSizeInBits(); unsigned Tmp, Tmp2; @@ -1078,7 +1078,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, assert(Depth <= MaxDepth && "Limit Search Depth"); assert(V->getType()->isIntegerTy() && "Not integer or pointer type!"); - const Type *T = V->getType(); + Type *T = V->getType(); ConstantInt *CI = dyn_cast<ConstantInt>(V); @@ -1315,11 +1315,11 @@ Value *llvm::isBytewiseValue(Value *V) { // indices from Idxs that should be left out when inserting into the resulting // struct. To is the result struct built so far, new insertvalue instructions // build on that. -static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, +static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, SmallVector<unsigned, 10> &Idxs, unsigned IdxSkip, Instruction *InsertBefore) { - const llvm::StructType *STy = llvm::dyn_cast<llvm::StructType>(IndexedType); + llvm::StructType *STy = llvm::dyn_cast<llvm::StructType>(IndexedType); if (STy) { // Save the original To argument so we can modify it Value *OrigTo = To; @@ -1358,8 +1358,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, return NULL; // Insert the value in the new (sub) aggregrate - return llvm::InsertValueInst::Create(To, V, - ArrayRef<unsigned>(Idxs).slice(IdxSkip), + return llvm::InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip), "tmp", InsertBefore); } @@ -1378,7 +1377,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range, Instruction *InsertBefore) { assert(InsertBefore && "Must have someplace to insert!"); - const Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(), + Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(), idx_range); Value *To = UndefValue::get(IndexedType); SmallVector<unsigned, 10> Idxs(idx_range.begin(), idx_range.end()); @@ -1404,7 +1403,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, && "Not looking at a struct or array?"); assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) && "Invalid indices for type?"); - const CompositeType *PTy = cast<CompositeType>(V->getType()); + CompositeType *PTy = cast<CompositeType>(V->getType()); if (isa<UndefValue>(V)) return UndefValue::get(ExtractValueInst::getIndexedType(PTy, @@ -1435,9 +1434,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, // %C = insertvalue {i32, i32 } %A, i32 11, 1 // which allows the unused 0,0 element from the nested struct to be // removed. - return BuildSubAggregate(V, - ArrayRef<unsigned>(idx_range.begin(), - req_idx), + return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx), InsertBefore); else // We can't handle this without inserting insertvalues @@ -1455,7 +1452,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, // requested (though possibly only partially). Now we recursively look at // the inserted value, passing any remaining indices. return FindInsertedValue(I->getInsertedValueOperand(), - ArrayRef<unsigned>(req_idx, idx_range.end()), + makeArrayRef(req_idx, idx_range.end()), InsertBefore); } else if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) { // If we're extracting a value from an aggregrate that was extracted from @@ -1506,7 +1503,7 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, if (OpC->isZero()) continue; // Handle a struct and array indices which add their offset to the pointer. - if (const StructType *STy = dyn_cast<StructType>(*GTI)) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); } else { uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); @@ -1557,8 +1554,8 @@ bool llvm::GetConstantStringInfo(const Value *V, std::string &Str, return false; // Make sure the index-ee is a pointer to array of i8. - const PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType()); - const ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType()); + PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType()); + ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType()); if (AT == 0 || !AT->getElementType()->isIntegerTy(8)) return false; |