diff options
Diffstat (limited to 'lib/Transforms')
20 files changed, 560 insertions, 370 deletions
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 4635d0e..1793bbf 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -2493,29 +2493,28 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { Changed = true; } - // If the aliasee has internal linkage, give it the name and linkage - // of the alias, and delete the alias. This turns: - // define internal ... @f(...) - // @a = alias ... @f - // into: - // define ... @a(...) - if (!Target->hasLocalLinkage()) - continue; - - // The transform is only useful if the alias does not have internal linkage. - if (J->hasLocalLinkage()) - continue; + // If the alias is externally visible, we may still be able to simplify it. + if (!J->hasLocalLinkage()) { + // If the aliasee has internal linkage, give it the name and linkage + // of the alias, and delete the alias. This turns: + // define internal ... @f(...) + // @a = alias ... @f + // into: + // define ... @a(...) + if (!Target->hasLocalLinkage()) + continue; - // Do not perform the transform if multiple aliases potentially target the - // aliasee. This check also ensures that it is safe to replace the section - // and other attributes of the aliasee with those of the alias. - if (!hasOneUse) - continue; + // Do not perform the transform if multiple aliases potentially target the + // aliasee. This check also ensures that it is safe to replace the section + // and other attributes of the aliasee with those of the alias. + if (!hasOneUse) + continue; - // Give the aliasee the name, linkage and other attributes of the alias. - Target->takeName(J); - Target->setLinkage(J->getLinkage()); - Target->GlobalValue::copyAttributesFrom(J); + // Give the aliasee the name, linkage and other attributes of the alias. + Target->takeName(J); + Target->setLinkage(J->getLinkage()); + Target->GlobalValue::copyAttributesFrom(J); + } // Delete the alias. M.getAliasList().erase(J); diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h index 2951dbc..829da6b 100644 --- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h +++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h @@ -15,6 +15,7 @@ #ifndef LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H #define LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H +#include "llvm/BasicBlock.h" #include "llvm/ADT/EquivalenceClasses.h" #include <vector> #include <algorithm> @@ -33,6 +34,18 @@ namespace llvm { typename MaximumSpanningTree<CT>::EdgeWeight Y) const { if (X.second > Y.second) return true; if (X.second < Y.second) return false; + if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.first)) { + if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.first)) { + if (BBX->size() > BBY->size()) return true; + if (BBX->size() < BBY->size()) return false; + } + } + if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.second)) { + if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.second)) { + if (BBX->size() > BBY->size()) return true; + if (BBX->size() < BBY->size()) return false; + } + } return false; } }; diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 9ca90c3..e4c4ae5 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -21,7 +21,6 @@ #include "llvm/InlineAsm.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" #include "llvm/Pass.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Target/TargetData.h" @@ -563,7 +562,7 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) { return false; } -/// OptimizeMemoryInst - Load and Store Instructions have often have +/// OptimizeMemoryInst - Load and Store Instructions often have /// addressing modes that can do significant amounts of computation. As such, /// instruction selection will try to get the load or store to do as much /// computation as possible for the program. The problem is that isel can only diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index b0988b5..1cfde8f 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -85,9 +85,14 @@ static bool doesClobberMemory(Instruction *I) { return true; if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { switch (II->getIntrinsicID()) { - default: return false; - case Intrinsic::memset: case Intrinsic::memmove: case Intrinsic::memcpy: - case Intrinsic::init_trampoline: case Intrinsic::lifetime_end: return true; + default: + return false; + case Intrinsic::memset: + case Intrinsic::memmove: + case Intrinsic::memcpy: + case Intrinsic::init_trampoline: + case Intrinsic::lifetime_end: + return true; } } return false; @@ -111,14 +116,13 @@ static Value *getPointerOperand(Instruction *I) { return SI->getPointerOperand(); if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) return MI->getOperand(1); - IntrinsicInst *II = cast<IntrinsicInst>(I); - switch (II->getIntrinsicID()) { - default: - assert(false && "Unexpected intrinsic!"); - case Intrinsic::init_trampoline: - return II->getOperand(1); - case Intrinsic::lifetime_end: - return II->getOperand(2); + + switch (cast<IntrinsicInst>(I)->getIntrinsicID()) { + default: assert(false && "Unexpected intrinsic!"); + case Intrinsic::init_trampoline: + return I->getOperand(1); + case Intrinsic::lifetime_end: + return I->getOperand(2); } } @@ -135,15 +139,13 @@ static unsigned getStoreSize(Instruction *I, const TargetData *TD) { if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { Len = MI->getLength(); } else { - IntrinsicInst *II = cast<IntrinsicInst>(I); - switch (II->getIntrinsicID()) { - default: - assert(false && "Unexpected intrinsic!"); - case Intrinsic::init_trampoline: - return -1u; - case Intrinsic::lifetime_end: - Len = II->getOperand(1); - break; + switch (cast<IntrinsicInst>(I)->getIntrinsicID()) { + default: assert(false && "Unexpected intrinsic!"); + case Intrinsic::init_trampoline: + return -1u; + case Intrinsic::lifetime_end: + Len = I->getOperand(1); + break; } } if (ConstantInt *LenCI = dyn_cast<ConstantInt>(Len)) diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 6f1c32c..222792b 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -31,15 +31,18 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/PHITransAddr.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/IRBuilder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -187,8 +190,11 @@ template <> struct DenseMapInfo<Expression> { static bool isEqual(const Expression &LHS, const Expression &RHS) { return LHS == RHS; } - static bool isPod() { return true; } }; + +template <> +struct isPodLike<Expression> { static const bool value = true; }; + } //===----------------------------------------------------------------------===// @@ -488,21 +494,21 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) { // Check to see if we have a single dominating call instruction that is // identical to C. for (unsigned i = 0, e = deps.size(); i != e; ++i) { - const MemoryDependenceAnalysis::NonLocalDepEntry *I = &deps[i]; + const NonLocalDepEntry *I = &deps[i]; // Ignore non-local dependencies. - if (I->second.isNonLocal()) + if (I->getResult().isNonLocal()) continue; // We don't handle non-depedencies. If we already have a call, reject // instruction dependencies. - if (I->second.isClobber() || cdep != 0) { + if (I->getResult().isClobber() || cdep != 0) { cdep = 0; break; } - CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->second.getInst()); + CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst()); // FIXME: All duplicated with non-local case. - if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){ + if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){ cdep = NonLocalDepCall; continue; } @@ -987,27 +993,27 @@ static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset, } -/// AnalyzeLoadFromClobberingStore - This function is called when we have a -/// memdep query of a load that ends up being a clobbering store. This means -/// that the store *may* provide bits used by the load but we can't be sure -/// because the pointers don't mustalias. Check this case to see if there is -/// anything more we can do before we give up. This returns -1 if we have to -/// give up, or a byte number in the stored value of the piece that feeds the -/// load. -static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI, +/// AnalyzeLoadFromClobberingWrite - This function is called when we have a +/// memdep query of a load that ends up being a clobbering memory write (store, +/// memset, memcpy, memmove). This means that the write *may* provide bits used +/// by the load but we can't be sure because the pointers don't mustalias. +/// +/// Check this case to see if there is anything more we can do before we give +/// up. This returns -1 if we have to give up, or a byte number in the stored +/// value of the piece that feeds the load. +static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, + Value *WritePtr, + uint64_t WriteSizeInBits, const TargetData &TD) { // If the loaded or stored value is an first class array or struct, don't try // to transform them. We need to be able to bitcast to integer. - if (isa<StructType>(L->getType()) || isa<ArrayType>(L->getType()) || - isa<StructType>(DepSI->getOperand(0)->getType()) || - isa<ArrayType>(DepSI->getOperand(0)->getType())) + if (isa<StructType>(LoadTy) || isa<ArrayType>(LoadTy)) return -1; int64_t StoreOffset = 0, LoadOffset = 0; - Value *StoreBase = - GetBaseWithConstantOffset(DepSI->getPointerOperand(), StoreOffset, TD); + Value *StoreBase = GetBaseWithConstantOffset(WritePtr, StoreOffset, TD); Value *LoadBase = - GetBaseWithConstantOffset(L->getPointerOperand(), LoadOffset, TD); + GetBaseWithConstantOffset(LoadPtr, LoadOffset, TD); if (StoreBase != LoadBase) return -1; @@ -1018,12 +1024,10 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI, #if 0 errs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" << "Base = " << *StoreBase << "\n" - << "Store Ptr = " << *DepSI->getPointerOperand() << "\n" - << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n" - << "Load Ptr = " << *L->getPointerOperand() << "\n" - << "Load Offs = " << LoadOffset << " - " << *L << "\n\n"; - errs() << "'" << L->getParent()->getParent()->getName() << "'" - << *L->getParent(); + << "Store Ptr = " << *WritePtr << "\n" + << "Store Offs = " << StoreOffset << "\n" + << "Load Ptr = " << *LoadPtr << "\n"; + abort(); #endif return -1; } @@ -1033,12 +1037,11 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI, // must have gotten confused. // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then // remove this check, as it is duplicated with what we have below. - uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType()); - uint64_t LoadSize = TD.getTypeSizeInBits(L->getType()); + uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy); - if ((StoreSize & 7) | (LoadSize & 7)) + if ((WriteSizeInBits & 7) | (LoadSize & 7)) return -1; - StoreSize >>= 3; // Convert to bytes. + uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes. LoadSize >>= 3; @@ -1052,12 +1055,10 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI, #if 0 errs() << "STORE LOAD DEP WITH COMMON BASE:\n" << "Base = " << *StoreBase << "\n" - << "Store Ptr = " << *DepSI->getPointerOperand() << "\n" - << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n" - << "Load Ptr = " << *L->getPointerOperand() << "\n" - << "Load Offs = " << LoadOffset << " - " << *L << "\n\n"; - errs() << "'" << L->getParent()->getParent()->getName() << "'" - << *L->getParent(); + << "Store Ptr = " << *WritePtr << "\n" + << "Store Offs = " << StoreOffset << "\n" + << "Load Ptr = " << *LoadPtr << "\n"; + abort(); #endif return -1; } @@ -1075,6 +1076,66 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI, return LoadOffset-StoreOffset; } +/// AnalyzeLoadFromClobberingStore - This function is called when we have a +/// memdep query of a load that ends up being a clobbering store. +static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr, + StoreInst *DepSI, + const TargetData &TD) { + // Cannot handle reading from store of first-class aggregate yet. + if (isa<StructType>(DepSI->getOperand(0)->getType()) || + isa<ArrayType>(DepSI->getOperand(0)->getType())) + return -1; + + Value *StorePtr = DepSI->getPointerOperand(); + uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType()); + return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, + StorePtr, StoreSize, TD); +} + +static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, + MemIntrinsic *MI, + const TargetData &TD) { + // If the mem operation is a non-constant size, we can't handle it. + ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength()); + if (SizeCst == 0) return -1; + uint64_t MemSizeInBits = SizeCst->getZExtValue()*8; + + // If this is memset, we just need to see if the offset is valid in the size + // of the memset.. + if (MI->getIntrinsicID() == Intrinsic::memset) + return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), + MemSizeInBits, TD); + + // If we have a memcpy/memmove, the only case we can handle is if this is a + // copy from constant memory. In that case, we can read directly from the + // constant memory. + MemTransferInst *MTI = cast<MemTransferInst>(MI); + + Constant *Src = dyn_cast<Constant>(MTI->getSource()); + if (Src == 0) return -1; + + GlobalVariable *GV = dyn_cast<GlobalVariable>(Src->getUnderlyingObject()); + if (GV == 0 || !GV->isConstant()) return -1; + + // See if the access is within the bounds of the transfer. + int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, + MI->getDest(), MemSizeInBits, TD); + if (Offset == -1) + return Offset; + + // Otherwise, see if we can constant fold a load from the constant with the + // offset applied as appropriate. + Src = ConstantExpr::getBitCast(Src, + llvm::Type::getInt8PtrTy(Src->getContext())); + Constant *OffsetCst = + ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); + Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); + Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); + if (ConstantFoldLoadFromConstPtr(Src, &TD)) + return Offset; + return -1; +} + /// GetStoreValueForLoad - This function is called when we have a /// memdep query of a load that ends up being a clobbering store. This means @@ -1089,50 +1150,134 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8; uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; + IRBuilder<> Builder(InsertPt->getParent(), InsertPt); // Compute which bits of the stored value are being used by the load. Convert // to an integer type to start with. if (isa<PointerType>(SrcVal->getType())) - SrcVal = new PtrToIntInst(SrcVal, TD.getIntPtrType(Ctx), "tmp", InsertPt); + SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp"); if (!isa<IntegerType>(SrcVal->getType())) - SrcVal = new BitCastInst(SrcVal, IntegerType::get(Ctx, StoreSize*8), - "tmp", InsertPt); + SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8), + "tmp"); // Shift the bits to the least significant depending on endianness. unsigned ShiftAmt; - if (TD.isLittleEndian()) { + if (TD.isLittleEndian()) ShiftAmt = Offset*8; - } else { + else ShiftAmt = (StoreSize-LoadSize-Offset)*8; - } if (ShiftAmt) - SrcVal = BinaryOperator::CreateLShr(SrcVal, - ConstantInt::get(SrcVal->getType(), ShiftAmt), "tmp", InsertPt); + SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp"); if (LoadSize != StoreSize) - SrcVal = new TruncInst(SrcVal, IntegerType::get(Ctx, LoadSize*8), - "tmp", InsertPt); + SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8), + "tmp"); return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD); } +/// GetMemInstValueForLoad - This function is called when we have a +/// memdep query of a load that ends up being a clobbering mem intrinsic. +static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, + const Type *LoadTy, Instruction *InsertPt, + const TargetData &TD){ + LLVMContext &Ctx = LoadTy->getContext(); + uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; + + IRBuilder<> Builder(InsertPt->getParent(), InsertPt); + + // We know that this method is only called when the mem transfer fully + // provides the bits for the load. + if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) { + // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and + // independently of what the offset is. + Value *Val = MSI->getValue(); + if (LoadSize != 1) + Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8)); + + Value *OneElt = Val; + + // Splat the value out to the right number of bits. + for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) { + // If we can double the number of bytes set, do it. + if (NumBytesSet*2 <= LoadSize) { + Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8); + Val = Builder.CreateOr(Val, ShVal); + NumBytesSet <<= 1; + continue; + } + + // Otherwise insert one byte at a time. + Value *ShVal = Builder.CreateShl(Val, 1*8); + Val = Builder.CreateOr(OneElt, ShVal); + ++NumBytesSet; + } + + return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD); + } + + // Otherwise, this is a memcpy/memmove from a constant global. + MemTransferInst *MTI = cast<MemTransferInst>(SrcInst); + Constant *Src = cast<Constant>(MTI->getSource()); + + // Otherwise, see if we can constant fold a load from the constant with the + // offset applied as appropriate. + Src = ConstantExpr::getBitCast(Src, + llvm::Type::getInt8PtrTy(Src->getContext())); + Constant *OffsetCst = + ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); + Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); + Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); + return ConstantFoldLoadFromConstPtr(Src, &TD); +} + + + struct AvailableValueInBlock { /// BB - The basic block in question. BasicBlock *BB; + enum ValType { + SimpleVal, // A simple offsetted value that is accessed. + MemIntrin // A memory intrinsic which is loaded from. + }; + /// V - The value that is live out of the block. - Value *V; - /// Offset - The byte offset in V that is interesting for the load query. + PointerIntPair<Value *, 1, ValType> Val; + + /// Offset - The byte offset in Val that is interesting for the load query. unsigned Offset; static AvailableValueInBlock get(BasicBlock *BB, Value *V, unsigned Offset = 0) { AvailableValueInBlock Res; Res.BB = BB; - Res.V = V; + Res.Val.setPointer(V); + Res.Val.setInt(SimpleVal); + Res.Offset = Offset; + return Res; + } + + static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI, + unsigned Offset = 0) { + AvailableValueInBlock Res; + Res.BB = BB; + Res.Val.setPointer(MI); + Res.Val.setInt(MemIntrin); Res.Offset = Offset; return Res; } + + bool isSimpleValue() const { return Val.getInt() == SimpleVal; } + Value *getSimpleValue() const { + assert(isSimpleValue() && "Wrong accessor"); + return Val.getPointer(); + } + + MemIntrinsic *getMemIntrinValue() const { + assert(!isSimpleValue() && "Wrong accessor"); + return cast<MemIntrinsic>(Val.getPointer()); + } }; /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, @@ -1149,30 +1294,33 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, const Type *LoadTy = LI->getType(); for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { - BasicBlock *BB = ValuesPerBlock[i].BB; - Value *AvailableVal = ValuesPerBlock[i].V; - unsigned Offset = ValuesPerBlock[i].Offset; + const AvailableValueInBlock &AV = ValuesPerBlock[i]; + BasicBlock *BB = AV.BB; if (SSAUpdate.HasValueForBlock(BB)) continue; - - if (AvailableVal->getType() != LoadTy) { - assert(TD && "Need target data to handle type mismatch case"); - AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy, - BB->getTerminator(), *TD); - - if (Offset) { - DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n" - << *ValuesPerBlock[i].V << '\n' + + unsigned Offset = AV.Offset; + + Value *AvailableVal; + if (AV.isSimpleValue()) { + AvailableVal = AV.getSimpleValue(); + if (AvailableVal->getType() != LoadTy) { + assert(TD && "Need target data to handle type mismatch case"); + AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy, + BB->getTerminator(), *TD); + + DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " + << *AV.getSimpleValue() << '\n' << *AvailableVal << '\n' << "\n\n\n"); } - - - DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n" - << *ValuesPerBlock[i].V << '\n' + } else { + AvailableVal = GetMemInstValueForLoad(AV.getMemIntrinValue(), Offset, + LoadTy, BB->getTerminator(), *TD); + DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset + << " " << *AV.getMemIntrinValue() << '\n' << *AvailableVal << '\n' << "\n\n\n"); } - SSAUpdate.AddAvailableValue(BB, AvailableVal); } @@ -1187,12 +1335,18 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, return V; } +static bool isLifetimeStart(Instruction *Inst) { + if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst)) + return II->getIntrinsicID() == Intrinsic::lifetime_start; + return false; +} + /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are /// non-local by performing PHI construction. bool GVN::processNonLocalLoad(LoadInst *LI, SmallVectorImpl<Instruction*> &toErase) { // Find the non-local dependencies of the load. - SmallVector<MemoryDependenceAnalysis::NonLocalDepEntry, 64> Deps; + SmallVector<NonLocalDepEntry, 64> Deps; MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(), Deps); //DEBUG(errs() << "INVESTIGATING NONLOCAL LOAD: " @@ -1206,11 +1360,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // If we had a phi translation failure, we'll have a single entry which is a // clobber in the current block. Reject this early. - if (Deps.size() == 1 && Deps[0].second.isClobber()) { + if (Deps.size() == 1 && Deps[0].getResult().isClobber()) { DEBUG( errs() << "GVN: non-local load "; WriteAsOperand(errs(), LI); - errs() << " is clobbered by " << *Deps[0].second.getInst() << '\n'; + errs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n'; ); return false; } @@ -1225,18 +1379,24 @@ bool GVN::processNonLocalLoad(LoadInst *LI, const TargetData *TD = 0; for (unsigned i = 0, e = Deps.size(); i != e; ++i) { - BasicBlock *DepBB = Deps[i].first; - MemDepResult DepInfo = Deps[i].second; + BasicBlock *DepBB = Deps[i].getBB(); + MemDepResult DepInfo = Deps[i].getResult(); if (DepInfo.isClobber()) { + // The address being loaded in this non-local block may not be the same as + // the pointer operand of the load if PHI translation occurs. Make sure + // to consider the right address. + Value *Address = Deps[i].getAddress(); + // If the dependence is to a store that writes to a superset of the bits // read by the load, we can extract the bits we need for the load from the // stored value. if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) { if (TD == 0) TD = getAnalysisIfAvailable<TargetData>(); - if (TD) { - int Offset = AnalyzeLoadFromClobberingStore(LI, DepSI, *TD); + if (TD && Address) { + int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address, + DepSI, *TD); if (Offset != -1) { ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, DepSI->getOperand(0), @@ -1245,8 +1405,23 @@ bool GVN::processNonLocalLoad(LoadInst *LI, } } } + + // If the clobbering value is a memset/memcpy/memmove, see if we can + // forward a value on from it. + if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) { + if (TD == 0) + TD = getAnalysisIfAvailable<TargetData>(); + if (TD && Address) { + int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address, + DepMI, *TD); + if (Offset != -1) { + ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI, + Offset)); + continue; + } + } + } - // FIXME: Handle memset/memcpy. UnavailableBlocks.push_back(DepBB); continue; } @@ -1254,21 +1429,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI, Instruction *DepInst = DepInfo.getInst(); // Loading the allocation -> undef. - if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) { + if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) || + // Loading immediately after lifetime begin -> undef. + isLifetimeStart(DepInst)) { ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, UndefValue::get(LI->getType()))); continue; } - // Loading immediately after lifetime begin or end -> undef. - if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) { - if (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end) { - ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, - UndefValue::get(LI->getType()))); - } - } - if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) { // Reject loads and stores that are to the same address but are of // different types if we have to. @@ -1378,19 +1546,25 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // to eliminate LI even if we insert uses in the other predecessors, we will // end up increasing code size. Reject this by scanning for LI. for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) - if (ValuesPerBlock[i].V == LI) + if (ValuesPerBlock[i].isSimpleValue() && + ValuesPerBlock[i].getSimpleValue() == LI) return false; + // FIXME: It is extremely unclear what this loop is doing, other than + // artificially restricting loadpre. if (isSinglePred) { bool isHot = false; - for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) - if (Instruction *I = dyn_cast<Instruction>(ValuesPerBlock[i].V)) + for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { + const AvailableValueInBlock &AV = ValuesPerBlock[i]; + if (AV.isSimpleValue()) // "Hot" Instruction is in some loop (because it dominates its dep. // instruction). - if (DT->dominates(LI, I)) { - isHot = true; - break; - } + if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue())) + if (DT->dominates(LI, I)) { + isHot = true; + break; + } + } // We are interested only in "hot" instructions. We don't want to do any // mis-optimizations here. @@ -1435,29 +1609,43 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // Do PHI translation to get its value in the predecessor if necessary. The // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. // - // FIXME: This may insert a computation, but we don't tell scalar GVN - // optimization stuff about it. How do we do this? SmallVector<Instruction*, 8> NewInsts; - Value *LoadPtr = 0; // If all preds have a single successor, then we know it is safe to insert the // load on the pred (?!?), so we can insert code to materialize the pointer if // it is not available. + PHITransAddr Address(LI->getOperand(0), TD); + Value *LoadPtr = 0; if (allSingleSucc) { - LoadPtr = MD->InsertPHITranslatedPointer(LI->getOperand(0), LoadBB, - UnavailablePred, TD, *DT,NewInsts); + LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, + *DT, NewInsts); } else { - LoadPtr = MD->GetAvailablePHITranslatedValue(LI->getOperand(0), LoadBB, - UnavailablePred, TD, *DT); - } + Address.PHITranslateValue(LoadBB, UnavailablePred); + LoadPtr = Address.getAddr(); + // Make sure the value is live in the predecessor. + if (Instruction *Inst = dyn_cast_or_null<Instruction>(LoadPtr)) + if (!DT->dominates(Inst->getParent(), UnavailablePred)) + LoadPtr = 0; + } + // If we couldn't find or insert a computation of this phi translated value, // we fail PRE. if (LoadPtr == 0) { + assert(NewInsts.empty() && "Shouldn't insert insts on failure"); DEBUG(errs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " << *LI->getOperand(0) << "\n"); return false; } + + // Assign value numbers to these new instructions. + for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) { + // FIXME: We really _ought_ to insert these value numbers into their + // parent's availability map. However, in doing so, we risk getting into + // ordering issues. If a block hasn't been processed yet, we would be + // marking a value as AVAIL-IN, which isn't what we intend. + VN.lookup_or_add(NewInsts[i]); + } // Make sure it is valid to move this load here. We have to watch out for: // @1 = getelementptr (i8* p, ... @@ -1517,11 +1705,6 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // If the value isn't available, don't do anything! if (Dep.isClobber()) { - // FIXME: We should handle memset/memcpy/memmove as dependent instructions - // to forward the value if available. - //if (isa<MemIntrinsic>(Dep.getInst())) - //errs() << "LOAD DEPENDS ON MEM: " << *L << "\n" << *Dep.getInst()<<"\n\n"; - // Check to see if we have something like this: // store i32 123, i32* %P // %A = bitcast i32* %P to i8* @@ -1532,25 +1715,42 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // a common base + constant offset, and if the previous store (or memset) // completely covers this load. This sort of thing can happen in bitfield // access code. + Value *AvailVal = 0; if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) { - int Offset = AnalyzeLoadFromClobberingStore(L, DepSI, *TD); - if (Offset != -1) { - Value *AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset, - L->getType(), L, *TD); - DEBUG(errs() << "GVN COERCED STORE BITS:\n" << *DepSI << '\n' - << *AvailVal << '\n' << *L << "\n\n\n"); - - // Replace the load! - L->replaceAllUsesWith(AvailVal); - if (isa<PointerType>(AvailVal->getType())) - MD->invalidateCachedPointerInfo(AvailVal); - toErase.push_back(L); - NumGVNLoad++; - return true; - } + int Offset = AnalyzeLoadFromClobberingStore(L->getType(), + L->getPointerOperand(), + DepSI, *TD); + if (Offset != -1) + AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset, + L->getType(), L, *TD); } + // If the clobbering value is a memset/memcpy/memmove, see if we can forward + // a value on from it. + if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) { + if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) { + int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), + L->getPointerOperand(), + DepMI, *TD); + if (Offset != -1) + AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD); + } + } + + if (AvailVal) { + DEBUG(errs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n' + << *AvailVal << '\n' << *L << "\n\n\n"); + + // Replace the load! + L->replaceAllUsesWith(AvailVal); + if (isa<PointerType>(AvailVal->getType())) + MD->invalidateCachedPointerInfo(AvailVal); + toErase.push_back(L); + NumGVNLoad++; + return true; + } + DEBUG( // fast print dep, using operator<< on instruction would be too slow errs() << "GVN: load "; @@ -1635,11 +1835,10 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { return true; } - // If this load occurs either right after a lifetime begin or a lifetime end, + // If this load occurs either right after a lifetime begin, // then the loaded value is undefined. if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) { - if (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start) { L->replaceAllUsesWith(UndefValue::get(L->getType())); toErase.push_back(L); NumGVNLoad++; diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index d12ad81..b9c536f 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -8585,25 +8585,36 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) { if (const IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) { uint32_t BitWidth = ITy->getBitWidth(); - if (BitWidth > 1) { - Value *LHS = ICI->getOperand(0); - Value *RHS = ICI->getOperand(1); - - APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0); - APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0); - APInt TypeMask(APInt::getHighBitsSet(BitWidth, BitWidth-1)); - ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS); - ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS); - - if (KnownZeroLHS.countLeadingOnes() == BitWidth-1 && - KnownZeroRHS.countLeadingOnes() == BitWidth-1) { + Value *LHS = ICI->getOperand(0); + Value *RHS = ICI->getOperand(1); + + APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0); + APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0); + APInt TypeMask(APInt::getAllOnesValue(BitWidth)); + ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS); + ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS); + + if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) { + APInt KnownBits = KnownZeroLHS | KnownOneLHS; + APInt UnknownBit = ~KnownBits; + if (UnknownBit.countPopulation() == 1) { if (!DoXform) return ICI; - Value *Xor = Builder->CreateXor(LHS, RHS); + Value *Result = Builder->CreateXor(LHS, RHS); + + // Mask off any bits that are set and won't be shifted away. + if (KnownOneLHS.uge(UnknownBit)) + Result = Builder->CreateAnd(Result, + ConstantInt::get(ITy, UnknownBit)); + + // Shift the bit we're testing down to the lsb. + Result = Builder->CreateLShr( + Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros())); + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) - Xor = Builder->CreateXor(Xor, ConstantInt::get(ITy, 1)); - Xor->takeName(ICI); - return ReplaceInstUsesWith(CI, Xor); + Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1)); + Result->takeName(ICI); + return ReplaceInstUsesWith(CI, Result); } } } @@ -11189,8 +11200,9 @@ namespace llvm { return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift && LHS.Width == RHS.Width; } - static bool isPod() { return true; } }; + template <> + struct isPodLike<LoweredPHIRecord> { static const bool value = true; }; } diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 1b93f34..d58b9c9 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -718,6 +718,11 @@ bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, if (PredSI->getSuccessor(PredCase) != DestBB && DestSI->getSuccessor(i) != DestBB) continue; + + // Do not forward this if it already goes to this destination, this would + // be an infinite loop. + if (PredSI->getSuccessor(PredCase) == DestSucc) + continue; // Otherwise, we're safe to make the change. Make sure that the edge from // DestSI to DestSucc is not critical and has no PHI nodes. diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 5511387..42a8fdc 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -160,16 +160,17 @@ namespace { // Because the exit block is not in the loop, we know we have to get _at // least_ its immediate dominator. - do { - // Get next Immediate Dominator. - IDom = IDom->getIDom(); - + IDom = IDom->getIDom(); + + while (IDom && IDom != BlockInLoopNode) { // If we have got to the header of the loop, then the instructions block // did not dominate the exit node, so we can't hoist it. if (IDom->getBlock() == LoopHeader) return false; - } while (IDom != BlockInLoopNode); + // Get next Immediate Dominator. + IDom = IDom->getIDom(); + }; return true; } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 564c7ac..85cc712 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -24,18 +24,14 @@ #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" -#include "llvm/Type.h" #include "llvm/DerivedTypes.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/IVUsers.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Transforms/Utils/AddrModeMatcher.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ValueHandle.h" @@ -85,8 +81,6 @@ namespace { class LoopStrengthReduce : public LoopPass { IVUsers *IU; - LoopInfo *LI; - DominatorTree *DT; ScalarEvolution *SE; bool Changed; @@ -94,10 +88,6 @@ namespace { /// particular stride. std::map<const SCEV *, IVsOfOneStride> IVsByStride; - /// StrideNoReuse - Keep track of all the strides whose ivs cannot be - /// reused (nor should they be rewritten to reuse other strides). - SmallSet<const SCEV *, 4> StrideNoReuse; - /// DeadInsts - Keep track of instructions we may have made dead, so that /// we can remove them after we are done working. SmallVector<WeakVH, 16> DeadInsts; @@ -109,8 +99,7 @@ namespace { public: static char ID; // Pass ID, replacement for typeid explicit LoopStrengthReduce(const TargetLowering *tli = NULL) : - LoopPass(&ID), TLI(tli) { - } + LoopPass(&ID), TLI(tli) {} bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -118,13 +107,11 @@ namespace { // We split critical edges, so we change the CFG. However, we do update // many analyses if they are around. AU.addPreservedID(LoopSimplifyID); - AU.addPreserved<LoopInfo>(); - AU.addPreserved<DominanceFrontier>(); - AU.addPreserved<DominatorTree>(); + AU.addPreserved("loops"); + AU.addPreserved("domfrontier"); + AU.addPreserved("domtree"); AU.addRequiredID(LoopSimplifyID); - AU.addRequired<LoopInfo>(); - AU.addRequired<DominatorTree>(); AU.addRequired<ScalarEvolution>(); AU.addPreserved<ScalarEvolution>(); AU.addRequired<IVUsers>(); @@ -228,19 +215,17 @@ void LoopStrengthReduce::DeleteTriviallyDeadInstructions() { if (DeadInsts.empty()) return; while (!DeadInsts.empty()) { - Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.back()); - DeadInsts.pop_back(); + Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()); if (I == 0 || !isInstructionTriviallyDead(I)) continue; - for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) { + for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) if (Instruction *U = dyn_cast<Instruction>(*OI)) { *OI = 0; if (U->use_empty()) DeadInsts.push_back(U); } - } I->eraseFromParent(); Changed = true; @@ -265,7 +250,7 @@ static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) { if (newLoop == L) return false; // if newLoop is an outer loop of L, this is OK. - if (!LoopInfo::isNotAlreadyContainedIn(L, newLoop)) + if (newLoop->contains(L->getHeader())) return false; } return true; @@ -338,9 +323,6 @@ namespace { /// BasedUser - For a particular base value, keep information about how we've /// partitioned the expression so far. struct BasedUser { - /// SE - The current ScalarEvolution object. - ScalarEvolution *SE; - /// Base - The Base value for the PHI node that needs to be inserted for /// this use. As the use is processed, information gets moved from this /// field to the Imm field (below). BasedUser values are sorted by this @@ -372,9 +354,9 @@ namespace { bool isUseOfPostIncrementedValue; BasedUser(IVStrideUse &IVSU, ScalarEvolution *se) - : SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()), + : Base(IVSU.getOffset()), Inst(IVSU.getUser()), OperandValToReplace(IVSU.getOperandValToReplace()), - Imm(SE->getIntegerSCEV(0, Base->getType())), + Imm(se->getIntegerSCEV(0, Base->getType())), isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {} // Once we rewrite the code to insert the new IVs we want, update the @@ -383,14 +365,14 @@ namespace { void RewriteInstructionToUseNewBase(const SCEV *const &NewBase, Instruction *InsertPt, SCEVExpander &Rewriter, Loop *L, Pass *P, - LoopInfo &LI, - SmallVectorImpl<WeakVH> &DeadInsts); + SmallVectorImpl<WeakVH> &DeadInsts, + ScalarEvolution *SE); Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase, const Type *Ty, SCEVExpander &Rewriter, - Instruction *IP, Loop *L, - LoopInfo &LI); + Instruction *IP, + ScalarEvolution *SE); void dump() const; }; } @@ -404,27 +386,12 @@ void BasedUser::dump() const { Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, const Type *Ty, SCEVExpander &Rewriter, - Instruction *IP, Loop *L, - LoopInfo &LI) { - // Figure out where we *really* want to insert this code. In particular, if - // the user is inside of a loop that is nested inside of L, we really don't - // want to insert this expression before the user, we'd rather pull it out as - // many loops as possible. - Instruction *BaseInsertPt = IP; - - // Figure out the most-nested loop that IP is in. - Loop *InsertLoop = LI.getLoopFor(IP->getParent()); - - // If InsertLoop is not L, and InsertLoop is nested inside of L, figure out - // the preheader of the outer-most loop where NewBase is not loop invariant. - if (L->contains(IP->getParent())) - while (InsertLoop && NewBase->isLoopInvariant(InsertLoop)) { - BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator(); - InsertLoop = InsertLoop->getParentLoop(); - } - - Value *Base = Rewriter.expandCodeFor(NewBase, 0, BaseInsertPt); + Instruction *IP, + ScalarEvolution *SE) { + Value *Base = Rewriter.expandCodeFor(NewBase, 0, IP); + // Wrap the base in a SCEVUnknown so that ScalarEvolution doesn't try to + // re-analyze it. const SCEV *NewValSCEV = SE->getUnknown(Base); // Always emit the immediate into the same block as the user. @@ -443,8 +410,8 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, Instruction *NewBasePt, SCEVExpander &Rewriter, Loop *L, Pass *P, - LoopInfo &LI, - SmallVectorImpl<WeakVH> &DeadInsts) { + SmallVectorImpl<WeakVH> &DeadInsts, + ScalarEvolution *SE) { if (!isa<PHINode>(Inst)) { // By default, insert code at the user instruction. BasicBlock::iterator InsertPt = Inst; @@ -473,7 +440,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, } Value *NewVal = InsertCodeForBaseAtPosition(NewBase, OperandValToReplace->getType(), - Rewriter, InsertPt, L, LI); + Rewriter, InsertPt, SE); // Replace the use of the operand Value with the new Phi we just created. Inst->replaceUsesOfWith(OperandValToReplace, NewVal); @@ -535,7 +502,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, PHIPred->getTerminator() : OldLoc->getParent()->getTerminator(); Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(), - Rewriter, InsertPt, L, LI); + Rewriter, InsertPt, SE); DEBUG(errs() << " Changing PHI use to "); DEBUG(WriteAsOperand(errs(), Code, /*PrintType=*/false)); @@ -1011,17 +978,13 @@ const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, const SCEV *const &Stride, IVExpr &IV, const Type *Ty, const std::vector<BasedUser>& UsersToProcess) { - if (StrideNoReuse.count(Stride)) - return SE->getIntegerSCEV(0, Stride->getType()); - if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) { int64_t SInt = SC->getValue()->getSExtValue(); for (unsigned NewStride = 0, e = IU->StrideOrder.size(); NewStride != e; ++NewStride) { std::map<const SCEV *, IVsOfOneStride>::iterator SI = IVsByStride.find(IU->StrideOrder[NewStride]); - if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first) || - StrideNoReuse.count(SI->first)) + if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first)) continue; // The other stride has no uses, don't reuse it. std::map<const SCEV *, IVUsersOfOneStride *>::iterator UI = @@ -1780,8 +1743,8 @@ LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride, RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV)); User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt, - Rewriter, L, this, *LI, - DeadInsts); + Rewriter, L, this, + DeadInsts, SE); // Mark old value we replaced as possibly dead, so that it is eliminated // if we just replaced the last use of that value. @@ -2745,8 +2708,6 @@ bool LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { IU = &getAnalysis<IVUsers>(); - LI = &getAnalysis<LoopInfo>(); - DT = &getAnalysis<DominatorTree>(); SE = &getAnalysis<ScalarEvolution>(); Changed = false; @@ -2792,15 +2753,14 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { // After all sharing is done, see if we can adjust the loop to test against // zero instead of counting up to a maximum. This is usually faster. OptimizeLoopCountIV(L); - } - // We're done analyzing this loop; release all the state we built up for it. - IVsByStride.clear(); - StrideNoReuse.clear(); + // We're done analyzing this loop; release all the state we built up for it. + IVsByStride.clear(); - // Clean up after ourselves - if (!DeadInsts.empty()) - DeleteTriviallyDeadInstructions(); + // Clean up after ourselves + if (!DeadInsts.empty()) + DeleteTriviallyDeadInstructions(); + } // At this point, it is worth checking to see if any recurrence PHIs are also // dead, so that we can remove them as well. diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 38d267a..b7adfdc 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -404,12 +404,13 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){ initLoopData(); - Function *F = loopHeader->getParent(); // If LoopSimplify was unable to form a preheader, don't do any unswitching. if (!loopPreheader) return false; + Function *F = loopHeader->getParent(); + // If the condition is trivial, always unswitch. There is no code growth for // this case. if (!IsTrivialUnswitchCondition(LoopCond)) { diff --git a/lib/Transforms/Scalar/SCCVN.cpp b/lib/Transforms/Scalar/SCCVN.cpp index 001267a..dbc82e1 100644 --- a/lib/Transforms/Scalar/SCCVN.cpp +++ b/lib/Transforms/Scalar/SCCVN.cpp @@ -19,7 +19,6 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" -#include "llvm/LLVMContext.h" #include "llvm/Operator.h" #include "llvm/Value.h" #include "llvm/ADT/DenseMap.h" @@ -155,8 +154,10 @@ template <> struct DenseMapInfo<Expression> { static bool isEqual(const Expression &LHS, const Expression &RHS) { return LHS == RHS; } - static bool isPod() { return true; } }; +template <> +struct isPodLike<Expression> { static const bool value = true; }; + } //===----------------------------------------------------------------------===// diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index ae6ad74..b040a27 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -105,7 +105,7 @@ namespace { void isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, AllocaInfo &Info); void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, - AllocaInfo &Info); + AllocaInfo &Info); void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, unsigned OpNo, AllocaInfo &Info); void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocaInst *AI, @@ -362,7 +362,6 @@ void SROA::DoScalarReplacement(AllocaInst *AI, // Now that we have created the alloca instructions that we want to use, // expand the getelementptr instructions to use them. - // while (!AI->use_empty()) { Instruction *User = cast<Instruction>(AI->use_back()); if (BitCastInst *BCInst = dyn_cast<BitCastInst>(User)) { @@ -450,11 +449,9 @@ void SROA::DoScalarReplacement(AllocaInst *AI, NumReplaced++; } - /// isSafeElementUse - Check to see if this use is an allowed use for a /// getelementptr instruction of an array aggregate allocation. isFirstElt /// indicates whether Ptr is known to the start of the aggregate. -/// void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, AllocaInfo &Info) { for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); @@ -503,7 +500,6 @@ void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, } } - isSafeElementUse(GEP, AreAllZeroIndices, AI, Info); if (Info.isUnsafe) return; break; @@ -543,9 +539,8 @@ static bool AllUsersAreLoads(Value *Ptr) { return true; } -/// isSafeUseOfAllocation - Check to see if this user is an allowed use for an +/// isSafeUseOfAllocation - Check if this user is an allowed use for an /// aggregate allocation. -/// void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, AllocaInfo &Info) { if (BitCastInst *C = dyn_cast<BitCastInst>(User)) @@ -614,7 +609,7 @@ void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, // integer. Specifically, consider A[0][i]. We cannot know that the user // isn't doing invalid things like allowing i to index an out-of-range // subscript that accesses A[1]. Because of this, we have to reject SROA - // of any accesses into structs where any of the components are variables. + // of any accesses into structs where any of the components are variables. if (IdxVal->getZExtValue() >= AT->getNumElements()) return MarkUnsafe(Info); } else if (const VectorType *VT = dyn_cast<VectorType>(*I)) { @@ -628,7 +623,7 @@ void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info); } -/// isSafeMemIntrinsicOnAllocation - Return true if the specified memory +/// isSafeMemIntrinsicOnAllocation - Check if the specified memory /// intrinsic can be promoted by SROA. At this point, we know that the operand /// of the memintrinsic is a pointer to the beginning of the allocation. void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, @@ -656,8 +651,8 @@ void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, } } -/// isSafeUseOfBitCastedAllocation - Return true if all users of this bitcast -/// are +/// isSafeUseOfBitCastedAllocation - Check if all users of this bitcast +/// from an alloca are safe for SROA of that alloca. void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocaInst *AI, AllocaInfo &Info) { for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end(); @@ -773,6 +768,10 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, OtherPtr = MTI->getRawDest(); } } + + // Keep track of the other intrinsic argument, so it can be removed if it + // is dead when the intrinsic is replaced. + Value *PossiblyDead = OtherPtr; // If there is an other pointer, we want to convert it to the same pointer // type as AI has, so we can GEP through it safely. @@ -926,9 +925,11 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, } } MI->eraseFromParent(); + if (PossiblyDead) + RecursivelyDeleteTriviallyDeadInstructions(PossiblyDead); } -/// RewriteStoreUserOfWholeAlloca - We found an store of an integer that +/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that /// overwrites the entire allocation. Extract out the pieces of the stored /// integer and store them individually. void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, @@ -1052,7 +1053,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, SI->eraseFromParent(); } -/// RewriteLoadUserOfWholeAlloca - We found an load of the entire allocation to +/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to /// an integer. Load the individual pieces to form the aggregate value. void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SmallVector<AllocaInst*, 32> &NewElts) { @@ -1186,7 +1187,6 @@ static bool HasPadding(const Type *Ty, const TargetData &TD) { /// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of /// an aggregate can be broken down into elements. Return 0 if not, 3 if safe, /// or 1 if safe after canonicalization has been performed. -/// int SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { // Loop over the use list of the alloca. We can only transform it if all of // the users are safe to transform. @@ -1215,7 +1215,7 @@ int SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { return Info.needsCleanup ? 1 : 3; } -/// CleanupGEP - GEP is used by an Alloca, which can be prompted after the GEP +/// CleanupGEP - GEP is used by an Alloca, which can be promoted after the GEP /// is canonicalized here. void SROA::CleanupGEP(GetElementPtrInst *GEPI) { gep_type_iterator I = gep_type_begin(GEPI); @@ -1347,7 +1347,7 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, } /// CanConvertToScalar - V is a pointer. If we can convert the pointee and all -/// its accesses to use a to single vector type, return true, and set VecTy to +/// its accesses to a single vector type, return true and set VecTy to /// the new type. If we could convert the alloca into a single promotable /// integer, return true but set VecTy to VoidTy. Further, if the use is not a /// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset @@ -1355,7 +1355,6 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, /// /// If we see at least one access to the value that is as a vector type, set the /// SawVec flag. -/// bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, bool &SawVec, uint64_t Offset, unsigned AllocaSize) { @@ -1438,7 +1437,6 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, return true; } - /// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca /// directly. This happens when we are converting an "integer union" to a /// single integer scalar, or when we are converting a "vector union" to a @@ -1481,7 +1479,8 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { if (StoreInst *SI = dyn_cast<StoreInst>(User)) { assert(SI->getOperand(0) != Ptr && "Consistency error!"); // FIXME: Remove once builder has Twine API. - Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").str().c_str()); + Value *Old = Builder.CreateLoad(NewAI, + (NewAI->getName()+".in").str().c_str()); Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset, Builder); Builder.CreateStore(New, NewAI); @@ -1506,7 +1505,8 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { APVal |= APVal << 8; // FIXME: Remove once builder has Twine API. - Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").str().c_str()); + Value *Old = Builder.CreateLoad(NewAI, + (NewAI->getName()+".in").str().c_str()); Value *New = ConvertScalar_InsertValue( ConstantInt::get(User->getContext(), APVal), Old, Offset, Builder); @@ -1679,7 +1679,6 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, return FromVal; } - /// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer /// or vector value "Old" at the offset specified by Offset. /// diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp index 13077fe..5acd6aa 100644 --- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp @@ -88,7 +88,7 @@ InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs, if (!isa<ReturnInst>(Body->getTerminator())) break; - Instruction *NextInst = next(BasicBlock::iterator(Call)); + Instruction *NextInst = llvm::next(BasicBlock::iterator(Call)); // Inline the call, taking care of what code ends up where. NewBlock = SplitBlock(NextInst->getParent(), NextInst, this); diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index f9b929c..6fd884b 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -128,8 +128,7 @@ public: /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*. Value *LibCallOptimization::CastToCStr(Value *V, IRBuilder<> &B) { - return - B.CreateBitCast(V, Type::getInt8PtrTy(*Context), "cstr"); + return B.CreateBitCast(V, Type::getInt8PtrTy(*Context), "cstr"); } /// EmitStrLen - Emit a call to the strlen function to the builder, for the @@ -157,27 +156,25 @@ Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) { Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len, unsigned Align, IRBuilder<> &B) { Module *M = Caller->getParent(); - Intrinsic::ID IID = Intrinsic::memcpy; - const Type *Tys[1]; - Tys[0] = Len->getType(); - Value *MemCpy = Intrinsic::getDeclaration(M, IID, Tys, 1); - return B.CreateCall4(MemCpy, CastToCStr(Dst, B), CastToCStr(Src, B), Len, + const Type *Ty = Len->getType(); + Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, &Ty, 1); + Dst = CastToCStr(Dst, B); + Src = CastToCStr(Src, B); + return B.CreateCall4(MemCpy, Dst, Src, Len, ConstantInt::get(Type::getInt32Ty(*Context), Align)); } -/// EmitMemMOve - Emit a call to the memmove function to the builder. This +/// EmitMemMove - Emit a call to the memmove function to the builder. This /// always expects that the size has type 'intptr_t' and Dst/Src are pointers. Value *LibCallOptimization::EmitMemMove(Value *Dst, Value *Src, Value *Len, unsigned Align, IRBuilder<> &B) { Module *M = Caller->getParent(); - Intrinsic::ID IID = Intrinsic::memmove; - const Type *Tys[1]; - Tys[0] = TD->getIntPtrType(*Context); - Value *MemMove = Intrinsic::getDeclaration(M, IID, Tys, 1); - Value *D = CastToCStr(Dst, B); - Value *S = CastToCStr(Src, B); + const Type *Ty = TD->getIntPtrType(*Context); + Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, &Ty, 1); + Dst = CastToCStr(Dst, B); + Src = CastToCStr(Src, B); Value *A = ConstantInt::get(Type::getInt32Ty(*Context), Align); - return B.CreateCall4(MemMove, D, S, Len, A); + return B.CreateCall4(MemMove, Dst, Src, Len, A); } /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is @@ -2647,10 +2644,11 @@ bool SimplifyLibCalls::doInitialization(Module &M) { // * strcspn("",a) -> 0 // * strcspn(s,"") -> strlen(a) // -// strstr: +// strstr: (PR5783) // * strstr(x,x) -> x -// * strstr(s1,s2) -> offset_of_s2_in(s1) -// (if s1 and s2 are constant strings) +// * strstr(x, "") -> x +// * strstr(x, "a") -> strchr(x, 'a') +// * strstr(s1,s2) -> result (if s1 and s2 are constant strings) // // tan, tanf, tanl: // * tan(atan(x)) -> x diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 2974592..2962e84 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -16,7 +16,6 @@ #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" #include "llvm/Constant.h" #include "llvm/Type.h" #include "llvm/Analysis/AliasAnalysis.h" diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index aef0f5f..7a37aa3 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -603,3 +603,65 @@ bool llvm::OnlyUsedByDbgInfoIntrinsics(Instruction *I, return true; } +/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI +/// nodes in this block. This doesn't try to be clever about PHI nodes +/// which differ only in the order of the incoming values, but instcombine +/// orders them so it usually won't matter. +/// +bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { + bool Changed = false; + + // This implementation doesn't currently consider undef operands + // specially. Theroetically, two phis which are identical except for + // one having an undef where the other doesn't could be collapsed. + + // Map from PHI hash values to PHI nodes. If multiple PHIs have + // the same hash value, the element is the first PHI in the + // linked list in CollisionMap. + DenseMap<uintptr_t, PHINode *> HashMap; + + // Maintain linked lists of PHI nodes with common hash values. + DenseMap<PHINode *, PHINode *> CollisionMap; + + // Examine each PHI. + for (BasicBlock::iterator I = BB->begin(); + PHINode *PN = dyn_cast<PHINode>(I++); ) { + // Compute a hash value on the operands. Instcombine will likely have sorted + // them, which helps expose duplicates, but we have to check all the + // operands to be safe in case instcombine hasn't run. + uintptr_t Hash = 0; + for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) { + // This hash algorithm is quite weak as hash functions go, but it seems + // to do a good enough job for this particular purpose, and is very quick. + Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I)); + Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7)); + } + // If we've never seen this hash value before, it's a unique PHI. + std::pair<DenseMap<uintptr_t, PHINode *>::iterator, bool> Pair = + HashMap.insert(std::make_pair(Hash, PN)); + if (Pair.second) continue; + // Otherwise it's either a duplicate or a hash collision. + for (PHINode *OtherPN = Pair.first->second; ; ) { + if (OtherPN->isIdenticalTo(PN)) { + // A duplicate. Replace this PHI with its duplicate. + PN->replaceAllUsesWith(OtherPN); + PN->eraseFromParent(); + Changed = true; + break; + } + // A non-duplicate hash collision. + DenseMap<PHINode *, PHINode *>::iterator I = CollisionMap.find(OtherPN); + if (I == CollisionMap.end()) { + // Set this PHI to be the head of the linked list of colliding PHIs. + PHINode *Old = Pair.first->second; + Pair.first->second = PN; + CollisionMap[PN] = Old; + break; + } + // Procede to the next PHI in the list. + OtherPN = I->second; + } + } + + return Changed; +} diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 8c18b59..743bb6e 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -244,7 +244,7 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) { // Merge case into clusters if (Cases.size()>=2) - for (CaseItr I=Cases.begin(), J=next(Cases.begin()); J!=Cases.end(); ) { + for (CaseItr I=Cases.begin(), J=llvm::next(Cases.begin()); J!=Cases.end(); ) { int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue(); int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue(); BasicBlock* nextBB = J->BB; diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index e25f9e2..846e432 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -55,7 +55,6 @@ struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > { static bool isEqual(const EltTy &LHS, const EltTy &RHS) { return LHS == RHS; } - static bool isPod() { return true; } }; } @@ -102,7 +101,7 @@ namespace { public: typedef std::vector<Value *> ValVector; - RenamePassData() {} + RenamePassData() : BB(NULL), Pred(NULL), Values() {} RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V) : BB(B), Pred(P), Values(V) {} BasicBlock *BB; diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index 8a07c35..ba41bf9 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -295,10 +295,14 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { InsertedVal = SingularValue; } + // Either path through the 'if' should have set insertedVal -> SingularVal. + assert((InsertedVal == SingularValue || isa<UndefValue>(InsertedVal)) && + "RAUW didn't change InsertedVal to be SingularVal"); + // Drop the entries we added in IncomingPredInfo to restore the stack. IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry, IncomingPredInfo.end()); - return InsertedVal; + return SingularValue; } // Otherwise, we do need a PHI: insert one now if we don't already have one. diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 89b0bd9..d7ca45e 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1589,69 +1589,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { return true; } -/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI -/// nodes in this block. This doesn't try to be clever about PHI nodes -/// which differ only in the order of the incoming values, but instcombine -/// orders them so it usually won't matter. -/// -bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { - bool Changed = false; - - // This implementation doesn't currently consider undef operands - // specially. Theroetically, two phis which are identical except for - // one having an undef where the other doesn't could be collapsed. - - // Map from PHI hash values to PHI nodes. If multiple PHIs have - // the same hash value, the element is the first PHI in the - // linked list in CollisionMap. - DenseMap<uintptr_t, PHINode *> HashMap; - - // Maintain linked lists of PHI nodes with common hash values. - DenseMap<PHINode *, PHINode *> CollisionMap; - - // Examine each PHI. - for (BasicBlock::iterator I = BB->begin(); - PHINode *PN = dyn_cast<PHINode>(I++); ) { - // Compute a hash value on the operands. Instcombine will likely have sorted - // them, which helps expose duplicates, but we have to check all the - // operands to be safe in case instcombine hasn't run. - uintptr_t Hash = 0; - for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) { - // This hash algorithm is quite weak as hash functions go, but it seems - // to do a good enough job for this particular purpose, and is very quick. - Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I)); - Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7)); - } - // If we've never seen this hash value before, it's a unique PHI. - std::pair<DenseMap<uintptr_t, PHINode *>::iterator, bool> Pair = - HashMap.insert(std::make_pair(Hash, PN)); - if (Pair.second) continue; - // Otherwise it's either a duplicate or a hash collision. - for (PHINode *OtherPN = Pair.first->second; ; ) { - if (OtherPN->isIdenticalTo(PN)) { - // A duplicate. Replace this PHI with its duplicate. - PN->replaceAllUsesWith(OtherPN); - PN->eraseFromParent(); - Changed = true; - break; - } - // A non-duplicate hash collision. - DenseMap<PHINode *, PHINode *>::iterator I = CollisionMap.find(OtherPN); - if (I == CollisionMap.end()) { - // Set this PHI to be the head of the linked list of colliding PHIs. - PHINode *Old = Pair.first->second; - Pair.first->second = PN; - CollisionMap[PN] = Old; - break; - } - // Procede to the next PHI in the list. - OtherPN = I->second; - } - } - - return Changed; -} - /// SimplifyCFG - This function is used to do simplification of a CFG. For /// example, it adjusts branches to branches to eliminate the extra hop, it /// eliminates unreachable basic blocks, and does other "peephole" optimization |