diff options
Diffstat (limited to 'lib/Transforms')
-rw-r--r-- | lib/Transforms/IPO/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Transforms/IPO/GlobalOpt.cpp | 5 | ||||
-rw-r--r-- | lib/Transforms/IPO/PartialInlining.cpp | 5 | ||||
-rw-r--r-- | lib/Transforms/IPO/RaiseAllocations.cpp | 4 | ||||
-rw-r--r-- | lib/Transforms/Instrumentation/RSProfiling.cpp | 12 | ||||
-rw-r--r-- | lib/Transforms/Scalar/CodeGenPrepare.cpp | 4 | ||||
-rw-r--r-- | lib/Transforms/Scalar/GVN.cpp | 29 | ||||
-rw-r--r-- | lib/Transforms/Scalar/IndVarSimplify.cpp | 13 | ||||
-rw-r--r-- | lib/Transforms/Scalar/InstructionCombining.cpp | 394 | ||||
-rw-r--r-- | lib/Transforms/Scalar/JumpThreading.cpp | 94 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopIndexSplit.cpp | 4 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopStrengthReduce.cpp | 151 | ||||
-rw-r--r-- | lib/Transforms/Scalar/ScalarReplAggregates.cpp | 6 | ||||
-rw-r--r-- | lib/Transforms/Scalar/SimplifyLibCalls.cpp | 93 | ||||
-rw-r--r-- | lib/Transforms/Scalar/TailRecursionElimination.cpp | 16 | ||||
-rw-r--r-- | lib/Transforms/Utils/Local.cpp | 45 | ||||
-rw-r--r-- | lib/Transforms/Utils/LowerAllocations.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/Utils/SimplifyCFG.cpp | 29 |
18 files changed, 555 insertions, 352 deletions
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 4b85e13..1438b48 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_library(LLVMipo LoopExtractor.cpp LowerSetJmp.cpp MergeFunctions.cpp + PartialInlining.cpp PartialSpecialization.cpp PruneEH.cpp RaiseAllocations.cpp diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 9a1b294..cbf3a1d 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -1667,11 +1667,14 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, // // NOTE: It doesn't make sense to promote non single-value types since we // are just replacing static memory to stack memory. + // + // If the global is in different address space, don't bring it to stack. if (!GS.HasMultipleAccessingFunctions && GS.AccessingFunction && !GS.HasNonInstructionUser && GV->getType()->getElementType()->isSingleValueType() && GS.AccessingFunction->getName() == "main" && - GS.AccessingFunction->hasExternalLinkage()) { + GS.AccessingFunction->hasExternalLinkage() && + GV->getType()->getAddressSpace() == 0) { DOUT << "LOCALIZING GLOBAL: " << *GV; Instruction* FirstI = GS.AccessingFunction->getEntryBlock().begin(); const Type* ElemTy = GV->getType()->getElementType(); diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index b3a25540..0b975ae 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -20,10 +20,13 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/FunctionUtils.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/CFG.h" using namespace llvm; +STATISTIC(NumPartialInlined, "Number of functions partially inlined"); + namespace { struct VISIBILITY_HIDDEN PartialInliner : public ModulePass { virtual void getAnalysisUsage(AnalysisUsage &AU) const { } @@ -132,6 +135,8 @@ Function* PartialInliner::unswitchFunction(Function* F) { duplicateFunction->replaceAllUsesWith(F); duplicateFunction->eraseFromParent(); + ++NumPartialInlined; + return extractedFunction; } diff --git a/lib/Transforms/IPO/RaiseAllocations.cpp b/lib/Transforms/IPO/RaiseAllocations.cpp index a81bbdb..8c97b5d 100644 --- a/lib/Transforms/IPO/RaiseAllocations.cpp +++ b/lib/Transforms/IPO/RaiseAllocations.cpp @@ -82,14 +82,14 @@ void RaiseAllocations::doInitialization(Module &M) { // Chck to see if we got the expected malloc if (TyWeHave != Malloc1Type) { - // Check to see if the prototype is wrong, giving us sbyte*(uint) * malloc + // Check to see if the prototype is wrong, giving us i8*(i32) * malloc // This handles the common declaration of: 'void *malloc(unsigned);' const FunctionType *Malloc2Type = FunctionType::get(PointerType::getUnqual(Type::Int8Ty), std::vector<const Type*>(1, Type::Int32Ty), false); if (TyWeHave != Malloc2Type) { // Check to see if the prototype is missing, giving us - // sbyte*(...) * malloc + // i8*(...) * malloc // This handles the common declaration of: 'void *malloc();' const FunctionType *Malloc3Type = FunctionType::get(PointerType::getUnqual(Type::Int8Ty), diff --git a/lib/Transforms/Instrumentation/RSProfiling.cpp b/lib/Transforms/Instrumentation/RSProfiling.cpp index c6cf4df..b110f4e 100644 --- a/lib/Transforms/Instrumentation/RSProfiling.cpp +++ b/lib/Transforms/Instrumentation/RSProfiling.cpp @@ -108,9 +108,9 @@ namespace { class VISIBILITY_HIDDEN GlobalRandomCounter : public Chooser { GlobalVariable* Counter; Value* ResetValue; - const Type* T; + const IntegerType* T; public: - GlobalRandomCounter(Module& M, const Type* t, uint64_t resetval); + GlobalRandomCounter(Module& M, const IntegerType* t, uint64_t resetval); virtual ~GlobalRandomCounter(); virtual void PrepFunction(Function* F); virtual void ProcessChoicePoint(BasicBlock* bb); @@ -121,9 +121,9 @@ namespace { GlobalVariable* Counter; Value* ResetValue; AllocaInst* AI; - const Type* T; + const IntegerType* T; public: - GlobalRandomCounterOpt(Module& M, const Type* t, uint64_t resetval); + GlobalRandomCounterOpt(Module& M, const IntegerType* t, uint64_t resetval); virtual ~GlobalRandomCounterOpt(); virtual void PrepFunction(Function* F); virtual void ProcessChoicePoint(BasicBlock* bb); @@ -193,7 +193,7 @@ static void getBackEdges(Function& F, T& BackEdges); // Methods of choosing when to profile /////////////////////////////////////// -GlobalRandomCounter::GlobalRandomCounter(Module& M, const Type* t, +GlobalRandomCounter::GlobalRandomCounter(Module& M, const IntegerType* t, uint64_t resetval) : T(t) { ConstantInt* Init = ConstantInt::get(T, resetval); ResetValue = Init; @@ -229,7 +229,7 @@ void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) { ReplacePhiPred(oldnext, bb, resetblock); } -GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const Type* t, +GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const IntegerType* t, uint64_t resetval) : AI(0), T(t) { ConstantInt* Init = ConstantInt::get(T, resetval); diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 42978e7..e9bee64 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -401,8 +401,8 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop -/// copy (e.g. it's casting from one pointer type to another, int->uint, or -/// int->sbyte on PPC), sink it into user blocks to reduce the number of virtual +/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), +/// sink it into user blocks to reduce the number of virtual /// registers that must be created and coalesced. /// /// Return true if any changes are made. diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 673d38b..f4a9898 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -37,6 +37,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include <cstdio> using namespace llvm; @@ -48,7 +49,7 @@ STATISTIC(NumPRELoad, "Number of loads PRE'd"); static cl::opt<bool> EnablePRE("enable-pre", cl::init(true), cl::Hidden); -cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); +static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); //===----------------------------------------------------------------------===// // ValueTable Class @@ -952,8 +953,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // If we had a phi translation failure, we'll have a single entry which is a // clobber in the current block. Reject this early. - if (Deps.size() == 1 && Deps[0].second.isClobber()) + if (Deps.size() == 1 && Deps[0].second.isClobber()) { + DEBUG( + DOUT << "GVN: non-local load "; + WriteAsOperand(*DOUT.stream(), LI); + DOUT << " is clobbered by " << *Deps[0].second.getInst(); + ); return false; + } // Filter out useless results (non-locals, etc). Keep track of the blocks // where we have a value available in repl, also keep track of whether we see @@ -1069,6 +1076,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, BasicBlock *TmpBB = LoadBB; bool isSinglePred = false; + bool allSingleSucc = true; while (TmpBB->getSinglePredecessor()) { isSinglePred = true; TmpBB = TmpBB->getSinglePredecessor(); @@ -1078,6 +1086,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI, return false; if (Blockers.count(TmpBB)) return false; + if (TmpBB->getTerminator()->getNumSuccessors() != 1) + allSingleSucc = false; } assert(TmpBB); @@ -1154,7 +1164,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI, << UnavailablePred->getName() << "': " << *LI); return false; } - + + // Make sure it is valid to move this load here. We have to watch out for: + // @1 = getelementptr (i8* p, ... + // test p and branch if == 0 + // load @1 + // It is valid to have the getelementptr before the test, even if p can be 0, + // as getelementptr only does address arithmetic. + // If we are not pushing the value through any multiple-successor blocks + // we do not have this case. Otherwise, check that the load is safe to + // put anywhere; this can be improved, but should be conservatively safe. + if (!allSingleSucc && + !isSafeToLoadUnconditionally(LoadPtr, UnavailablePred->getTerminator())) + return false; + // Okay, we can eliminate this load by inserting a reload in the predecessor // and using PHI construction to get the value in the other predecessors, do // it. diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 38b1198..326fb38 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -17,7 +17,10 @@ // which starts at zero and steps by one. // 2. The canonical induction variable is guaranteed to be the first PHI node // in the loop header block. -// 3. Any pointer arithmetic recurrences are raised to use array subscripts. +// 3. The canonical induction variable is guaranteed to be in a wide enough +// type so that IV expressions need not be (directly) zero-extended or +// sign-extended. +// 4. Any pointer arithmetic recurrences are raised to use array subscripts. // // If the trip count of a loop is computable, this pass also makes the following // changes: @@ -296,11 +299,11 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, // If this instruction is dead now, delete it. RecursivelyDeleteTriviallyDeadInstructions(Inst); - // See if this is a single-entry LCSSA PHI node. If so, we can (and - // have to) remove - // the PHI entirely. This is safe, because the NewVal won't be variant + // If we're inserting code into the exit block rather than the + // preheader, we can (and have to) remove the PHI entirely. + // This is safe, because the NewVal won't be variant // in the loop, so we don't need an LCSSA phi node anymore. - if (NumPreds == 1) { + if (ExitBlocks.size() == 1) { PN->replaceAllUsesWith(ExitVal); RecursivelyDeleteTriviallyDeadInstructions(PN); break; diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index 5465e4a..5bd17e0 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -390,7 +390,7 @@ namespace { Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned); - bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty, + bool CanEvaluateInDifferentType(Value *V, const Type *Ty, unsigned CastOpc, int &NumCastsRemoved); unsigned GetOrEnforceKnownAlignment(Value *V, unsigned PrefAlign = 0); @@ -654,30 +654,12 @@ static unsigned getOpcode(const Value *V) { } /// AddOne - Add one to a ConstantInt -static ConstantInt *AddOne(ConstantInt *C) { - APInt Val(C->getValue()); - return ConstantInt::get(++Val); +static Constant *AddOne(Constant *C) { + return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); } /// SubOne - Subtract one from a ConstantInt -static ConstantInt *SubOne(ConstantInt *C) { - APInt Val(C->getValue()); - return ConstantInt::get(--Val); -} -/// Add - Add two ConstantInts together -static ConstantInt *Add(ConstantInt *C1, ConstantInt *C2) { - return ConstantInt::get(C1->getValue() + C2->getValue()); -} -/// And - Bitwise AND two ConstantInts together -static ConstantInt *And(ConstantInt *C1, ConstantInt *C2) { - return ConstantInt::get(C1->getValue() & C2->getValue()); -} -/// Subtract - Subtract one ConstantInt from another -static ConstantInt *Subtract(ConstantInt *C1, ConstantInt *C2) { - return ConstantInt::get(C1->getValue() - C2->getValue()); -} -/// Multiply - Multiply two ConstantInts together -static ConstantInt *Multiply(ConstantInt *C1, ConstantInt *C2) { - return ConstantInt::get(C1->getValue() * C2->getValue()); +static Constant *SubOne(ConstantInt *C) { + return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1)); } /// MultiplyOverflows - True if the multiply can not be expressed in an int /// this size. @@ -774,7 +756,7 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero, /// SimplifyDemandedBits knows about. See if the instruction has any /// properties that allow us to simplify its operands. bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { - unsigned BitWidth = cast<IntegerType>(Inst.getType())->getBitWidth(); + unsigned BitWidth = Inst.getType()->getScalarSizeInBits(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); APInt DemandedMask(APInt::getAllOnesValue(BitWidth)); @@ -830,13 +812,13 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, const Type *VTy = V->getType(); assert((TD || !isa<PointerType>(VTy)) && "SimplifyDemandedBits needs to know bit widths!"); - assert((!TD || TD->getTypeSizeInBits(VTy) == BitWidth) && - (!isa<IntegerType>(VTy) || - VTy->getPrimitiveSizeInBits() == BitWidth) && + assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) && + (!VTy->isIntOrIntVector() || + VTy->getScalarSizeInBits() == BitWidth) && KnownZero.getBitWidth() == BitWidth && KnownOne.getBitWidth() == BitWidth && - "Value *V, DemandedMask, KnownZero and KnownOne \ - must have same BitWidth"); + "Value *V, DemandedMask, KnownZero and KnownOne " + "must have same BitWidth"); if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { // We know all of the bits for a constant! KnownOne = CI->getValue() & DemandedMask; @@ -1089,7 +1071,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, RHSKnownZero &= LHSKnownZero; break; case Instruction::Trunc: { - unsigned truncBf = I->getOperand(0)->getType()->getPrimitiveSizeInBits(); + unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits(); DemandedMask.zext(truncBf); RHSKnownZero.zext(truncBf); RHSKnownOne.zext(truncBf); @@ -1112,7 +1094,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, break; case Instruction::ZExt: { // Compute the bits in the result that are not present in the input. - unsigned SrcBitWidth =I->getOperand(0)->getType()->getPrimitiveSizeInBits(); + unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); DemandedMask.trunc(SrcBitWidth); RHSKnownZero.trunc(SrcBitWidth); @@ -1130,7 +1112,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } case Instruction::SExt: { // Compute the bits in the result that are not present in the input. - unsigned SrcBitWidth =I->getOperand(0)->getType()->getPrimitiveSizeInBits(); + unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); APInt InputDemandedBits = DemandedMask & APInt::getLowBitsSet(BitWidth, SrcBitWidth); @@ -1354,7 +1336,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { APInt RA = Rem->getValue().abs(); if (RA.isPowerOf2()) { - if (DemandedMask.ule(RA)) // srem won't affect demanded bits + if (DemandedMask.ult(RA)) // srem won't affect demanded bits return I->getOperand(0); APInt LowBits = RA - 1; @@ -2087,7 +2069,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // See if SimplifyDemandedBits can simplify this. This handles stuff like // (X & 254)+1 -> (X&254)|1 - if (!isa<VectorType>(I.getType()) && SimplifyDemandedInstructionBits(I)) + if (SimplifyDemandedInstructionBits(I)) return &I; // zext(i1) - 1 -> select i1, 0, -1 @@ -2107,7 +2089,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { Value *XorLHS = 0; if (isa<ConstantInt>(RHSC) && match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { - uint32_t TySizeBits = I.getType()->getPrimitiveSizeInBits(); + uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue(); uint32_t Size = TySizeBits / 2; @@ -2197,7 +2179,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // X*C1 + X*C2 --> X * (C1+C2) ConstantInt *C1; if (X == dyn_castFoldableMul(RHS, C1)) - return BinaryOperator::CreateMul(X, Add(C1, C2)); + return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2)); } // X + X*C --> X * (C+1) @@ -2262,7 +2244,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // (X & FF00) + xx00 -> (X+xx00) & FF00 if (LHS->hasOneUse() && match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) { - Constant *Anded = And(CRHS, C2); + Constant *Anded = ConstantExpr::getAnd(CRHS, C2); if (Anded == CRHS) { // See if all bits from the first bit set in the Add RHS up are included // in the mask. First, get the rightmost bit. @@ -2290,7 +2272,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { } // add (cast *A to intptrtype) B -> - // cast (GEP (cast *A to sbyte*) B) --> intptrtype + // cast (GEP (cast *A to i8*) B) --> intptrtype { CastInst *CI = dyn_cast<CastInst>(LHS); Value *Other = RHS; @@ -2299,7 +2281,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { Other = LHS; } if (CI && CI->getType()->isSized() && - (CI->getType()->getPrimitiveSizeInBits() == + (CI->getType()->getScalarSizeInBits() == TD->getIntPtrType()->getPrimitiveSizeInBits()) && isa<PointerType>(CI->getOperand(0)->getType())) { unsigned AS = @@ -2523,7 +2505,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) { if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1))) // C1-(X+C2) --> (C1-C2)-X - return BinaryOperator::CreateSub(Subtract(CI1, CI2), + return BinaryOperator::CreateSub(ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0)); } } @@ -2564,7 +2546,8 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // X - X*C --> X * (1-C) ConstantInt *C2 = 0; if (dyn_castFoldableMul(Op1I, C2) == Op0) { - Constant *CP1 = Subtract(ConstantInt::get(I.getType(), 1), C2); + Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(), 1), + C2); return BinaryOperator::CreateMul(Op0, CP1); } } @@ -2589,7 +2572,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2) if (X == dyn_castFoldableMul(Op1, C2)) - return BinaryOperator::CreateMul(X, Subtract(C1, C2)); + return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2)); } return 0; } @@ -2950,12 +2933,12 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { // (sdiv X, X) --> 1 (udiv X, X) --> 1 if (Op0 == Op1) { if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) { - ConstantInt *CI = ConstantInt::get(Ty->getElementType(), 1); + Constant *CI = ConstantInt::get(Ty->getElementType(), 1); std::vector<Constant*> Elts(Ty->getNumElements(), CI); return ReplaceInstUsesWith(I, ConstantVector::get(Elts)); } - ConstantInt *CI = ConstantInt::get(I.getType(), 1); + Constant *CI = ConstantInt::get(I.getType(), 1); return ReplaceInstUsesWith(I, CI); } @@ -2980,7 +2963,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); else return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0), - Multiply(RHS, LHSRHS)); + ConstantExpr::getMul(RHS, LHSRHS)); } if (!RHS->isZero()) { // avoid X udiv 0 @@ -3513,7 +3496,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, Value *X = Op->getOperand(0); Constant *Together = 0; if (!Op->isShift()) - Together = And(AndRHS, OpRHS); + Together = ConstantExpr::getAnd(AndRHS, OpRHS); switch (Op->getOpcode()) { case Instruction::Xor: @@ -3724,7 +3707,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, switch (LHSI->getOpcode()) { default: return 0; case Instruction::And: - if (And(N, Mask) == Mask) { + if (ConstantExpr::getAnd(N, Mask) == Mask) { // If the AndRHS is a power of two minus one (0+1+), this is simple. if ((Mask->getValue().countLeadingZeros() + Mask->getValue().countPopulation()) == @@ -3748,7 +3731,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0 if ((Mask->getValue().countLeadingZeros() + Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth() - && And(N, Mask)->isZero()) + && ConstantExpr::getAnd(N, Mask)->isNullValue()) break; return 0; } @@ -3946,10 +3929,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. - if (!isa<VectorType>(I.getType())) { - if (SimplifyDemandedInstructionBits(I)) - return &I; - } else { + if (SimplifyDemandedInstructionBits(I)) + return &I; + if (isa<VectorType>(I.getType())) { if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) { if (CP->isAllOnesValue()) // X & <-1,-1> -> X return ReplaceInstUsesWith(I, I.getOperand(0)); @@ -3957,7 +3939,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return ReplaceInstUsesWith(I, Op1); // X & <0,0> -> <0,0> } } - + if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) { const APInt& AndRHSMask = AndRHS->getValue(); APInt NotAndRHS(~AndRHSMask); @@ -4510,7 +4492,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, Instruction *Add = BinaryOperator::CreateAdd(Val, AddCST, Val->getName()+".off"); InsertNewInstBefore(Add, I); - AddCST = Subtract(AddOne(RHSCst), LHSCst); + AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST); } break; // (X == 13 | X == 15) -> no change @@ -4653,18 +4635,17 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. - if (!isa<VectorType>(I.getType())) { - if (SimplifyDemandedInstructionBits(I)) - return &I; - } else if (isa<ConstantAggregateZero>(Op1)) { - return ReplaceInstUsesWith(I, Op0); // X | <0,0> -> X - } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) { - if (CP->isAllOnesValue()) // X | <-1,-1> -> <-1,-1> - return ReplaceInstUsesWith(I, I.getOperand(1)); + if (SimplifyDemandedInstructionBits(I)) + return &I; + if (isa<VectorType>(I.getType())) { + if (isa<ConstantAggregateZero>(Op1)) { + return ReplaceInstUsesWith(I, Op0); // X | <0,0> -> X + } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) { + if (CP->isAllOnesValue()) // X | <-1,-1> -> <-1,-1> + return ReplaceInstUsesWith(I, I.getOperand(1)); + } } - - // or X, -1 == -1 if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { ConstantInt *C1 = 0; Value *X = 0; @@ -4991,12 +4972,11 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. - if (!isa<VectorType>(I.getType())) { - if (SimplifyDemandedInstructionBits(I)) - return &I; - } else if (isa<ConstantAggregateZero>(Op1)) { - return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X - } + if (SimplifyDemandedInstructionBits(I)) + return &I; + if (isa<VectorType>(I.getType())) + if (isa<ConstantAggregateZero>(Op1)) + return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X // Is this a ~ operation? if (Value *NotOp = dyn_castNotVal(&I)) { @@ -5083,7 +5063,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS); // Anything in both C1 and C2 is known to be zero, remove it from // NewRHS. - Constant *CommonBits = And(Op0CI, RHS); + Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS); NewRHS = ConstantExpr::getAnd(NewRHS, ConstantExpr::getNot(CommonBits)); AddToWorkList(Op0I); @@ -5247,12 +5227,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { return Changed ? &I : 0; } -/// AddWithOverflow - Compute Result = In1+In2, returning true if the result -/// overflowed for this type. -static bool AddWithOverflow(ConstantInt *&Result, ConstantInt *In1, - ConstantInt *In2, bool IsSigned = false) { - Result = cast<ConstantInt>(Add(In1, In2)); +static ConstantInt *ExtractElement(Constant *V, Constant *Idx) { + return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx)); +} +static bool HasAddOverflow(ConstantInt *Result, + ConstantInt *In1, ConstantInt *In2, + bool IsSigned) { if (IsSigned) if (In2->getValue().isNegative()) return Result->getValue().sgt(In1->getValue()); @@ -5262,12 +5243,32 @@ static bool AddWithOverflow(ConstantInt *&Result, ConstantInt *In1, return Result->getValue().ult(In1->getValue()); } -/// SubWithOverflow - Compute Result = In1-In2, returning true if the result +/// AddWithOverflow - Compute Result = In1+In2, returning true if the result /// overflowed for this type. -static bool SubWithOverflow(ConstantInt *&Result, ConstantInt *In1, - ConstantInt *In2, bool IsSigned = false) { - Result = cast<ConstantInt>(Subtract(In1, In2)); +static bool AddWithOverflow(Constant *&Result, Constant *In1, + Constant *In2, bool IsSigned = false) { + Result = ConstantExpr::getAdd(In1, In2); + + if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { + for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { + Constant *Idx = ConstantInt::get(Type::Int32Ty, i); + if (HasAddOverflow(ExtractElement(Result, Idx), + ExtractElement(In1, Idx), + ExtractElement(In2, Idx), + IsSigned)) + return true; + } + return false; + } + + return HasAddOverflow(cast<ConstantInt>(Result), + cast<ConstantInt>(In1), cast<ConstantInt>(In2), + IsSigned); +} +static bool HasSubOverflow(ConstantInt *Result, + ConstantInt *In1, ConstantInt *In2, + bool IsSigned) { if (IsSigned) if (In2->getValue().isNegative()) return Result->getValue().slt(In1->getValue()); @@ -5277,6 +5278,29 @@ static bool SubWithOverflow(ConstantInt *&Result, ConstantInt *In1, return Result->getValue().ugt(In1->getValue()); } +/// SubWithOverflow - Compute Result = In1-In2, returning true if the result +/// overflowed for this type. +static bool SubWithOverflow(Constant *&Result, Constant *In1, + Constant *In2, bool IsSigned = false) { + Result = ConstantExpr::getSub(In1, In2); + + if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) { + for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { + Constant *Idx = ConstantInt::get(Type::Int32Ty, i); + if (HasSubOverflow(ExtractElement(Result, Idx), + ExtractElement(In1, Idx), + ExtractElement(In2, Idx), + IsSigned)) + return true; + } + return false; + } + + return HasSubOverflow(cast<ConstantInt>(Result), + cast<ConstantInt>(In1), cast<ConstantInt>(In2), + IsSigned); +} + /// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the /// code necessary to compute the offset from the base pointer (without adding /// in the base pointer). Return the result as a signed integer of intptr size. @@ -5589,7 +5613,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, // Check to see that the input is converted from an integer type that is small // enough that preserves all bits. TODO: check here for "known" sign bits. // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e. - unsigned InputSize = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits(); + unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits(); // If this is a uitofp instruction, we need an extra bit to hold the sign. bool LHSUnsigned = isa<UIToFPInst>(LHSI); @@ -5644,7 +5668,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, // See if the FP constant is too large for the integer. For example, // comparing an i8 to 300.0. - unsigned IntWidth = IntTy->getPrimitiveSizeInBits(); + unsigned IntWidth = IntTy->getScalarSizeInBits(); if (!LHSUnsigned) { // If the RHS value is > SignedMax, fold the comparison. This handles +INF @@ -5943,9 +5967,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { unsigned BitWidth = 0; if (TD) - BitWidth = TD->getTypeSizeInBits(Ty); - else if (isa<IntegerType>(Ty)) - BitWidth = Ty->getPrimitiveSizeInBits(); + BitWidth = TD->getTypeSizeInBits(Ty->getScalarType()); + else if (Ty->isIntOrIntVector()) + BitWidth = Ty->getScalarSizeInBits(); bool isSignBit = false; @@ -6459,7 +6483,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and // C2 (CI). By solving for X we can turn this into a range check // instead of computing a divide. - ConstantInt *Prod = Multiply(CmpRHS, DivRHS); + Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS); // Determine if the product overflows by seeing if the product is // not equal to the divide. Make sure we do the same kind of divide @@ -6478,7 +6502,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, // overflow variable is set to 0 if it's corresponding bound variable is valid // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. int LoOverflow = 0, HiOverflow = 0; - ConstantInt *LoBound = 0, *HiBound = 0; + Constant *LoBound = 0, *HiBound = 0; if (!DivIsSigned) { // udiv // e.g. X/5 op 3 --> [15, 20) @@ -6966,7 +6990,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) { if (BO->hasOneUse()) return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), - Subtract(RHS, BOp1C)); + ConstantExpr::getSub(RHS, BOp1C)); } else if (RHSV == 0) { // Replace ((add A, B) != 0) with (A != -B) if A or B is // efficiently invertible, or if the add has just this one use. @@ -7133,10 +7157,10 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { if (Res2 == CI) { // Make sure that sign of the Cmp and the sign of the Cast are the same. // For example, we might have: - // %A = sext short %X to uint - // %B = icmp ugt uint %A, 1330 + // %A = sext i16 %X to i32 + // %B = icmp ugt i32 %A, 1330 // It is incorrect to transform this into - // %B = icmp ugt short %X, 1330 + // %B = icmp ugt i16 %X, 1330 // because %A may have negative value. // // However, we allow this when the compare is EQ/NE, because they are @@ -7210,18 +7234,16 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0)) if (CSI->isAllOnesValue()) return ReplaceInstUsesWith(I, CSI); - + // See if we can turn a signed shr into an unsigned shr. - if (!isa<VectorType>(I.getType())) { - if (MaskedValueIsZero(Op0, - APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()))) - return BinaryOperator::CreateLShr(Op0, I.getOperand(1)); - - // Arithmetic shifting an all-sign-bit value is a no-op. - unsigned NumSignBits = ComputeNumSignBits(Op0); - if (NumSignBits == Op0->getType()->getPrimitiveSizeInBits()) - return ReplaceInstUsesWith(I, Op0); - } + if (MaskedValueIsZero(Op0, + APInt::getSignBit(I.getType()->getScalarSizeInBits()))) + return BinaryOperator::CreateLShr(Op0, I.getOperand(1)); + + // Arithmetic shifting an all-sign-bit value is a no-op. + unsigned NumSignBits = ComputeNumSignBits(Op0); + if (NumSignBits == Op0->getType()->getScalarSizeInBits()) + return ReplaceInstUsesWith(I, Op0); return 0; } @@ -7250,7 +7272,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { } // See if we can fold away this shift. - if (!isa<VectorType>(I.getType()) && SimplifyDemandedInstructionBits(I)) + if (SimplifyDemandedInstructionBits(I)) return &I; // Try to fold constant and into select arguments. @@ -7271,10 +7293,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. - uint32_t TypeBits = Op0->getType()->getPrimitiveSizeInBits(); + uint32_t TypeBits = Op0->getType()->getScalarSizeInBits(); - // shl uint X, 32 = 0 and shr ubyte Y, 9 = 0, ... just don't eliminate shr - // of a signed value. + // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate + // a signed shift. // if (Op1->uge(TypeBits)) { if (I.getOpcode() != Instruction::AShr) @@ -7320,8 +7342,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // part of the register be zeros. Emulate this by inserting an AND to // clear the top bits as needed. This 'and' will usually be zapped by // other xforms later if dead. - unsigned SrcSize = TrOp->getType()->getPrimitiveSizeInBits(); - unsigned DstSize = TI->getType()->getPrimitiveSizeInBits(); + unsigned SrcSize = TrOp->getType()->getScalarSizeInBits(); + unsigned DstSize = TI->getType()->getScalarSizeInBits(); APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize)); // The mask we constructed says what the trunc would do if occurring @@ -7729,7 +7751,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // If the allocation size is constant, form a constant mul expression Amt = ConstantInt::get(Type::Int32Ty, Scale); if (isa<ConstantInt>(NumElements)) - Amt = Multiply(cast<ConstantInt>(NumElements), cast<ConstantInt>(Amt)); + Amt = ConstantExpr::getMul(cast<ConstantInt>(NumElements), + cast<ConstantInt>(Amt)); // otherwise multiply the amount and the number of elements else { Instruction *Tmp = BinaryOperator::CreateMul(Amt, NumElements, "tmp"); @@ -7788,17 +7811,17 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, /// If CastOpc is a sext or zext, we are asking if the low bits of the value can /// bit computed in a larger type, which is then and'd or sext_in_reg'd to get /// the final result. -bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty, +bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty, unsigned CastOpc, int &NumCastsRemoved){ // We can always evaluate constants in another type. - if (isa<ConstantInt>(V)) + if (isa<Constant>(V)) return true; Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - const IntegerType *OrigTy = cast<IntegerType>(V->getType()); + const Type *OrigTy = V->getType(); // If this is an extension or truncate, we can often eliminate it. if (isa<TruncInst>(I) || isa<ZExtInst>(I) || isa<SExtInst>(I)) { @@ -7836,8 +7859,8 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty, // If we are truncating the result of this SHL, and if it's a shift of a // constant amount, we can always perform a SHL in a smaller type. if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { - uint32_t BitWidth = Ty->getBitWidth(); - if (BitWidth < OrigTy->getBitWidth() && + uint32_t BitWidth = Ty->getScalarSizeInBits(); + if (BitWidth < OrigTy->getScalarSizeInBits() && CI->getLimitedValue(BitWidth) < BitWidth) return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, NumCastsRemoved); @@ -7848,8 +7871,8 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty, // lshr iff we know that the bits we would otherwise be shifting in are // already zeros. if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { - uint32_t OrigBitWidth = OrigTy->getBitWidth(); - uint32_t BitWidth = Ty->getBitWidth(); + uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); + uint32_t BitWidth = Ty->getScalarSizeInBits(); if (BitWidth < OrigBitWidth && MaskedValueIsZero(I->getOperand(0), APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) && @@ -8131,8 +8154,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { Value *Src = CI.getOperand(0); const Type *SrcTy = Src->getType(); const Type *DestTy = CI.getType(); - uint32_t SrcBitSize = SrcTy->getPrimitiveSizeInBits(); - uint32_t DestBitSize = DestTy->getPrimitiveSizeInBits(); + uint32_t SrcBitSize = SrcTy->getScalarSizeInBits(); + uint32_t DestBitSize = DestTy->getScalarSizeInBits(); // See if we can simplify any instructions used by the LHS whose sole // purpose is to compute bits we don't care about. @@ -8151,8 +8174,9 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { // Only do this if the dest type is a simple type, don't convert the // expression tree to something weird like i93 unless the source is also // strange. - (isSafeIntegerType(DestTy) || !isSafeIntegerType(SrcI->getType())) && - CanEvaluateInDifferentType(SrcI, cast<IntegerType>(DestTy), + (isSafeIntegerType(DestTy->getScalarType()) || + !isSafeIntegerType(SrcI->getType()->getScalarType())) && + CanEvaluateInDifferentType(SrcI, DestTy, CI.getOpcode(), NumCastsRemoved)) { // If this cast is a truncate, evaluting in a different type always // eliminates the cast, so it is always a win. If this is a zero-extension, @@ -8350,17 +8374,18 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { Value *Src = CI.getOperand(0); const Type *Ty = CI.getType(); - uint32_t DestBitWidth = Ty->getPrimitiveSizeInBits(); - uint32_t SrcBitWidth = cast<IntegerType>(Src->getType())->getBitWidth(); + uint32_t DestBitWidth = Ty->getScalarSizeInBits(); + uint32_t SrcBitWidth = Src->getType()->getScalarSizeInBits(); // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0) - if (DestBitWidth == 1) { + if (DestBitWidth == 1 && + isa<VectorType>(Ty) == isa<VectorType>(Src->getType())) { Constant *One = ConstantInt::get(Src->getType(), 1); Src = InsertNewInstBefore(BinaryOperator::CreateAnd(Src, One, "tmp"), CI); Value *Zero = Constant::getNullValue(Src->getType()); return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); } - + // Optimize trunc(lshr(), c) to pull the shift through the truncate. ConstantInt *ShAmtV = 0; Value *ShiftOp = 0; @@ -8403,7 +8428,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, Value *In = ICI->getOperand(0); Value *Sh = ConstantInt::get(In->getType(), - In->getType()->getPrimitiveSizeInBits()-1); + In->getType()->getScalarSizeInBits()-1); In = InsertNewInstBefore(BinaryOperator::CreateLShr(In, Sh, In->getName()+".lobit"), CI); @@ -8494,28 +8519,30 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { // Get the sizes of the types involved. We know that the intermediate type // will be smaller than A or C, but don't know the relation between A and C. Value *A = CSrc->getOperand(0); - unsigned SrcSize = A->getType()->getPrimitiveSizeInBits(); - unsigned MidSize = CSrc->getType()->getPrimitiveSizeInBits(); - unsigned DstSize = CI.getType()->getPrimitiveSizeInBits(); + unsigned SrcSize = A->getType()->getScalarSizeInBits(); + unsigned MidSize = CSrc->getType()->getScalarSizeInBits(); + unsigned DstSize = CI.getType()->getScalarSizeInBits(); // If we're actually extending zero bits, then if // SrcSize < DstSize: zext(a & mask) // SrcSize == DstSize: a & mask // SrcSize > DstSize: trunc(a) & mask if (SrcSize < DstSize) { APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); - Constant *AndConst = ConstantInt::get(AndValue); + Constant *AndConst = ConstantInt::get(A->getType(), AndValue); Instruction *And = BinaryOperator::CreateAnd(A, AndConst, CSrc->getName()+".mask"); InsertNewInstBefore(And, CI); return new ZExtInst(And, CI.getType()); } else if (SrcSize == DstSize) { APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); - return BinaryOperator::CreateAnd(A, ConstantInt::get(AndValue)); + return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(), + AndValue)); } else if (SrcSize > DstSize) { Instruction *Trunc = new TruncInst(A, CI.getType(), "tmp"); InsertNewInstBefore(Trunc, CI); APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize)); - return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(AndValue)); + return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(Trunc->getType(), + AndValue)); } } @@ -8537,6 +8564,33 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { } } + // zext(trunc(t) & C) -> (t & zext(C)). + if (SrcI && SrcI->getOpcode() == Instruction::And && SrcI->hasOneUse()) + if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1))) + if (TruncInst *TI = dyn_cast<TruncInst>(SrcI->getOperand(0))) { + Value *TI0 = TI->getOperand(0); + if (TI0->getType() == CI.getType()) + return + BinaryOperator::CreateAnd(TI0, + ConstantExpr::getZExt(C, CI.getType())); + } + + // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)). + if (SrcI && SrcI->getOpcode() == Instruction::Xor && SrcI->hasOneUse()) + if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1))) + if (BinaryOperator *And = dyn_cast<BinaryOperator>(SrcI->getOperand(0))) + if (And->getOpcode() == Instruction::And && And->hasOneUse() && + And->getOperand(1) == C) + if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) { + Value *TI0 = TI->getOperand(0); + if (TI0->getType() == CI.getType()) { + Constant *ZC = ConstantExpr::getZExt(C, CI.getType()); + Instruction *NewAnd = BinaryOperator::CreateAnd(TI0, ZC, "tmp"); + InsertNewInstBefore(NewAnd, *And); + return BinaryOperator::CreateXor(NewAnd, ZC); + } + } + return 0; } @@ -8556,9 +8610,9 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // eliminate the trunc/sext pair. if (getOpcode(Src) == Instruction::Trunc) { Value *Op = cast<User>(Src)->getOperand(0); - unsigned OpBits = cast<IntegerType>(Op->getType())->getBitWidth(); - unsigned MidBits = cast<IntegerType>(Src->getType())->getBitWidth(); - unsigned DestBits = cast<IntegerType>(CI.getType())->getBitWidth(); + unsigned OpBits = Op->getType()->getScalarSizeInBits(); + unsigned MidBits = Src->getType()->getScalarSizeInBits(); + unsigned DestBits = CI.getType()->getScalarSizeInBits(); unsigned NumSignBits = ComputeNumSignBits(Op); if (OpBits == DestBits) { @@ -8599,8 +8653,8 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { BA == CA && isa<TruncInst>(A)) { Value *I = cast<TruncInst>(A)->getOperand(0); if (I->getType() == CI.getType()) { - unsigned MidSize = Src->getType()->getPrimitiveSizeInBits(); - unsigned SrcDstSize = CI.getType()->getPrimitiveSizeInBits(); + unsigned MidSize = Src->getType()->getScalarSizeInBits(); + unsigned SrcDstSize = CI.getType()->getScalarSizeInBits(); unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize; Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt); I = InsertNewInstBefore(BinaryOperator::CreateShl(I, ShAmtV, @@ -8671,11 +8725,11 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1)); if (LHSTrunc->getType() != SrcTy && RHSTrunc->getType() != SrcTy) { - unsigned DstSize = CI.getType()->getPrimitiveSizeInBits(); + unsigned DstSize = CI.getType()->getScalarSizeInBits(); // If the source types were both smaller than the destination type of // the cast, do this xform. - if (LHSTrunc->getType()->getPrimitiveSizeInBits() <= DstSize && - RHSTrunc->getType()->getPrimitiveSizeInBits() <= DstSize) { + if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize && + RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) { LHSTrunc = InsertCastBefore(Instruction::FPExt, LHSTrunc, CI.getType(), CI); RHSTrunc = InsertCastBefore(Instruction::FPExt, RHSTrunc, @@ -8706,7 +8760,7 @@ Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) { // 'X' value would cause an undefined result for the fptoui. if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) && OpI->getOperand(0)->getType() == FI.getType() && - (int)FI.getType()->getPrimitiveSizeInBits() < /*extra bit for sign */ + (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */ OpI->getType()->getFPMantissaWidth()) return ReplaceInstUsesWith(FI, OpI->getOperand(0)); @@ -8726,7 +8780,7 @@ Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) { // 'X' value would cause an undefined result for the fptoui. if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) && OpI->getOperand(0)->getType() == FI.getType() && - (int)FI.getType()->getPrimitiveSizeInBits() <= + (int)FI.getType()->getScalarSizeInBits() <= OpI->getType()->getFPMantissaWidth()) return ReplaceInstUsesWith(FI, OpI->getOperand(0)); @@ -8747,7 +8801,7 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { // trunc to be exposed to other transforms. Don't do this for extending // ptrtoint's, because we don't know if the target sign or zero extends its // pointers. - if (CI.getType()->getPrimitiveSizeInBits() < TD->getPointerSizeInBits()) { + if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { Value *P = InsertNewInstBefore(new PtrToIntInst(CI.getOperand(0), TD->getIntPtrType(), "tmp"), CI); @@ -8763,7 +8817,7 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { // allows the trunc to be exposed to other transforms. Don't do this for // extending inttoptr's, because we don't know if the target sign or zero // extends to pointers. - if (CI.getOperand(0)->getType()->getPrimitiveSizeInBits() > + if (CI.getOperand(0)->getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) { Value *P = InsertNewInstBefore(new TruncInst(CI.getOperand(0), TD->getIntPtrType(), @@ -9194,7 +9248,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, (Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) { Value *In = ICI->getOperand(0); Value *Sh = ConstantInt::get(In->getType(), - In->getType()->getPrimitiveSizeInBits()-1); + In->getType()->getScalarSizeInBits()-1); In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh, In->getName()+".lobit"), *ICI); @@ -9316,7 +9370,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // The comparison constant and the result are not neccessarily the // same width. Make an all-ones value by inserting a AShr. Value *X = IC->getOperand(0); - uint32_t Bits = X->getType()->getPrimitiveSizeInBits(); + uint32_t Bits = X->getType()->getScalarSizeInBits(); Constant *ShAmt = ConstantInt::get(X->getType(), Bits-1); Instruction *SRA = BinaryOperator::Create(Instruction::AShr, X, ShAmt, "ones"); @@ -10850,8 +10904,8 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { static Value *InsertCastToIntPtrTy(Value *V, const Type *DTy, Instruction *InsertPoint, InstCombiner *IC) { - unsigned PtrSize = DTy->getPrimitiveSizeInBits(); - unsigned VTySize = V->getType()->getPrimitiveSizeInBits(); + unsigned PtrSize = DTy->getScalarSizeInBits(); + unsigned VTySize = V->getType()->getScalarSizeInBits(); // We must cast correctly to the pointer type. Ensure that we // sign extend the integer value if it is smaller as this is // used for address computation. @@ -10892,7 +10946,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { const Type *SrcTy = CI->getOperand(0)->getType(); // We can eliminate a cast from i32 to i64 iff the target // is a 32-bit pointer target. - if (SrcTy->getPrimitiveSizeInBits() >= TD->getPointerSizeInBits()) { + if (SrcTy->getScalarSizeInBits() >= TD->getPointerSizeInBits()) { MadeChange = true; *i = CI->getOperand(0); } @@ -11105,7 +11159,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { ConstantInt *Scale = 0; if (ArrayEltSize == 1) { NewIdx = GEP.getOperand(1); - Scale = ConstantInt::get(NewIdx->getType(), 1); + Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1); } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) { NewIdx = ConstantInt::get(CI->getType(), 1); Scale = CI; @@ -11114,7 +11168,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { isa<ConstantInt>(Inst->getOperand(1))) { ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1)); uint32_t ShAmtVal = ShAmt->getLimitedValue(64); - Scale = ConstantInt::get(Inst->getType(), 1ULL << ShAmtVal); + Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()), + 1ULL << ShAmtVal); NewIdx = Inst->getOperand(0); } else if (Inst->getOpcode() == Instruction::Mul && isa<ConstantInt>(Inst->getOperand(1))) { @@ -11390,45 +11445,6 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, return 0; } -/// isSafeToLoadUnconditionally - Return true if we know that executing a load -/// from this value cannot trap. If it is not obviously safe to load from the -/// specified pointer, we do a quick local scan of the basic block containing -/// ScanFrom, to determine if the address is already accessed. -static bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) { - // If it is an alloca it is always safe to load from. - if (isa<AllocaInst>(V)) return true; - - // If it is a global variable it is mostly safe to load from. - if (const GlobalValue *GV = dyn_cast<GlobalVariable>(V)) - // Don't try to evaluate aliases. External weak GV can be null. - return !isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage(); - - // Otherwise, be a little bit agressive by scanning the local block where we - // want to check to see if the pointer is already being loaded or stored - // from/to. If so, the previous load or store would have already trapped, - // so there is no harm doing an extra load (also, CSE will later eliminate - // the load entirely). - BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin(); - - while (BBI != E) { - --BBI; - - // If we see a free or a call (which might do a free) the pointer could be - // marked invalid. - if (isa<FreeInst>(BBI) || - (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI))) - return false; - - if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { - if (LI->getOperand(0) == V) return true; - } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { - if (SI->getOperand(1) == V) return true; - } - - } - return false; -} - Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Value *Op = LI.getOperand(0); diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index c0ca2df..5a70fc3 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -76,7 +76,7 @@ namespace { bool ProcessBlock(BasicBlock *BB); bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB, unsigned JumpThreadCost); - BasicBlock *FactorCommonPHIPreds(PHINode *PN, Constant *CstVal); + BasicBlock *FactorCommonPHIPreds(PHINode *PN, Value *Val); bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); @@ -163,10 +163,10 @@ void JumpThreading::FindLoopHeaders(Function &F) { /// This is important for things like "phi i1 [true, true, false, true, x]" /// where we only need to clone the block for the true blocks once. /// -BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Constant *CstVal) { +BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Value *Val) { SmallVector<BasicBlock*, 16> CommonPreds; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingValue(i) == CstVal) + if (PN->getIncomingValue(i) == Val) CommonPreds.push_back(PN->getIncomingBlock(i)); if (CommonPreds.size() == 1) @@ -324,10 +324,6 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { } } - // If there is only a single predecessor of this block, nothing to fold. - if (BB->getSinglePredecessor()) - return false; - // All the rest of our checks depend on the condition being an instruction. if (CondInst == 0) return false; @@ -346,13 +342,36 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { CondInst->getOpcode() == Instruction::And)) return true; - // If we have "br (phi != 42)" and the phi node has any constant values as - // operands, we can thread through this block. - if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) - if (isa<PHINode>(CondCmp->getOperand(0)) && - isa<Constant>(CondCmp->getOperand(1)) && - ProcessBranchOnCompare(CondCmp, BB)) - return true; + if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) { + if (isa<PHINode>(CondCmp->getOperand(0))) { + // If we have "br (phi != 42)" and the phi node has any constant values + // as operands, we can thread through this block. + // + // If we have "br (cmp phi, x)" and the phi node contains x such that the + // comparison uniquely identifies the branch target, we can thread + // through this block. + + if (ProcessBranchOnCompare(CondCmp, BB)) + return true; + } + + // If we have a comparison, loop over the predecessors to see if there is + // a condition with the same value. + pred_iterator PI = pred_begin(BB), E = pred_end(BB); + for (; PI != E; ++PI) + if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) + if (PBI->isConditional() && *PI != BB) { + if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) { + if (CI->getOperand(0) == CondCmp->getOperand(0) && + CI->getOperand(1) == CondCmp->getOperand(1) && + CI->getPredicate() == CondCmp->getPredicate()) { + // TODO: Could handle things like (x != 4) --> (x == 17) + if (ProcessBranchOnDuplicateCond(*PI, BB)) + return true; + } + } + } + } // Check for some cases that are worth simplifying. Right now we want to look // for loads that are used by a switch or by the condition for the branch. If @@ -770,12 +789,30 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB, return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost); } +/// GetResultOfComparison - Given an icmp/fcmp predicate and the left and right +/// hand sides of the compare instruction, try to determine the result. If the +/// result can not be determined, a null pointer is returned. +static Constant *GetResultOfComparison(CmpInst::Predicate pred, + Value *LHS, Value *RHS) { + if (Constant *CLHS = dyn_cast<Constant>(LHS)) + if (Constant *CRHS = dyn_cast<Constant>(RHS)) + return ConstantExpr::getCompare(pred, CLHS, CRHS); + + if (LHS == RHS) + if (isa<IntegerType>(LHS->getType()) || isa<PointerType>(LHS->getType())) + return ICmpInst::isTrueWhenEqual(pred) ? + ConstantInt::getTrue() : ConstantInt::getFalse(); + + return 0; +} + /// ProcessBranchOnCompare - We found a branch on a comparison between a phi -/// node and a constant. If the PHI node contains any constants as inputs, we -/// can fold the compare for that edge and thread through it. +/// node and a value. If we can identify when the comparison is true between +/// the phi inputs and the value, we can fold the compare for that edge and +/// thread through it. bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) { PHINode *PN = cast<PHINode>(Cmp->getOperand(0)); - Constant *RHS = cast<Constant>(Cmp->getOperand(1)); + Value *RHS = Cmp->getOperand(1); // If the phi isn't in the current block, an incoming edge to this block // doesn't control the destination. @@ -784,18 +821,17 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) { // We can do this simplification if any comparisons fold to true or false. // See if any do. - Constant *PredCst = 0; + Value *PredVal = 0; bool TrueDirection = false; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - PredCst = dyn_cast<Constant>(PN->getIncomingValue(i)); - if (PredCst == 0) continue; + PredVal = PN->getIncomingValue(i); + + Constant *Res = GetResultOfComparison(Cmp->getPredicate(), PredVal, RHS); + if (!Res) { + PredVal = 0; + continue; + } - Constant *Res; - if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cmp)) - Res = ConstantExpr::getICmp(ICI->getPredicate(), PredCst, RHS); - else - Res = ConstantExpr::getFCmp(cast<FCmpInst>(Cmp)->getPredicate(), - PredCst, RHS); // If this folded to a constant expr, we can't do anything. if (ConstantInt *ResC = dyn_cast<ConstantInt>(Res)) { TrueDirection = ResC->getZExtValue(); @@ -808,11 +844,11 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) { } // Otherwise, we can't fold this input. - PredCst = 0; + PredVal = 0; } // If no match, bail out. - if (PredCst == 0) + if (PredVal == 0) return false; // See if the cost of duplicating this block is low enough. @@ -825,7 +861,7 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) { // If so, we can actually do this threading. Merge any common predecessors // that will act the same. - BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst); + BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredVal); // Next, get our successor. BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(!TrueDirection); diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp index 9c78596..6f7a7f8 100644 --- a/lib/Transforms/Scalar/LoopIndexSplit.cpp +++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp @@ -290,13 +290,13 @@ static bool isUsedOutsideLoop(Value *V, Loop *L) { // Return V+1 static Value *getPlusOne(Value *V, bool Sign, Instruction *InsertPt) { - ConstantInt *One = ConstantInt::get(V->getType(), 1, Sign); + Constant *One = ConstantInt::get(V->getType(), 1, Sign); return BinaryOperator::CreateAdd(V, One, "lsp", InsertPt); } // Return V-1 static Value *getMinusOne(Value *V, bool Sign, Instruction *InsertPt) { - ConstantInt *One = ConstantInt::get(V->getType(), 1, Sign); + Constant *One = ConstantInt::get(V->getType(), 1, Sign); return BinaryOperator::CreateSub(V, One, "lsp", InsertPt); } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 944f409..7579748 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -143,10 +143,10 @@ namespace { /// inside the loop then try to eliminate the cast opeation. void OptimizeShadowIV(Loop *L); - /// OptimizeSMax - Rewrite the loop's terminating condition - /// if it uses an smax computation. - ICmpInst *OptimizeSMax(Loop *L, ICmpInst *Cond, - IVStrideUse* &CondUse); + /// OptimizeMax - Rewrite the loop's terminating condition + /// if it uses a max computation. + ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond, + IVStrideUse* &CondUse); bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, const SCEVHandle *&CondStride); @@ -336,13 +336,6 @@ namespace { /// EmittedBase. Value *OperandValToReplace; - /// isSigned - The stride (and thus also the Base) of this use may be in - /// a narrower type than the use itself (OperandValToReplace->getType()). - /// When this is the case, the isSigned field indicates whether the - /// IV expression should be signed-extended instead of zero-extended to - /// fit the type of the use. - bool isSigned; - /// Imm - The immediate value that should be added to the base immediately /// before Inst, because it will be folded into the imm field of the /// instruction. This is also sometimes used for loop-variant values that @@ -363,7 +356,6 @@ namespace { BasedUser(IVStrideUse &IVSU, ScalarEvolution *se) : SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()), OperandValToReplace(IVSU.getOperandValToReplace()), - isSigned(IVSU.isSigned()), Imm(SE->getIntegerSCEV(0, Base->getType())), isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {} @@ -428,11 +420,6 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase, NewValSCEV = SE->getAddExpr(NewValSCEV, Imm); } - if (isSigned) - NewValSCEV = SE->getTruncateOrSignExtend(NewValSCEV, Ty); - else - NewValSCEV = SE->getTruncateOrZeroExtend(NewValSCEV, Ty); - return Rewriter.expandCodeFor(NewValSCEV, Ty, IP); } @@ -592,7 +579,7 @@ static void MoveLoopVariantsToImmediateField(SCEVHandle &Val, SCEVHandle &Imm, if (Val->isLoopInvariant(L)) return; // Nothing to do. if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { - std::vector<SCEVHandle> NewOps; + SmallVector<SCEVHandle, 4> NewOps; NewOps.reserve(SAE->getNumOperands()); for (unsigned i = 0; i != SAE->getNumOperands(); ++i) @@ -613,7 +600,7 @@ static void MoveLoopVariantsToImmediateField(SCEVHandle &Val, SCEVHandle &Imm, SCEVHandle Start = SARE->getStart(); MoveLoopVariantsToImmediateField(Start, Imm, L, SE); - std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end()); + SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Start; Val = SE->getAddRecExpr(Ops, SARE->getLoop()); } else { @@ -633,7 +620,7 @@ static void MoveImmediateValues(const TargetLowering *TLI, bool isAddress, Loop *L, ScalarEvolution *SE) { if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { - std::vector<SCEVHandle> NewOps; + SmallVector<SCEVHandle, 4> NewOps; NewOps.reserve(SAE->getNumOperands()); for (unsigned i = 0; i != SAE->getNumOperands(); ++i) { @@ -660,7 +647,7 @@ static void MoveImmediateValues(const TargetLowering *TLI, MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE); if (Start != SARE->getStart()) { - std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end()); + SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Start; Val = SE->getAddRecExpr(Ops, SARE->getLoop()); } @@ -717,7 +704,7 @@ static void MoveImmediateValues(const TargetLowering *TLI, /// SeparateSubExprs - Decompose Expr into all of the subexpressions that are /// added together. This is used to reassociate common addition subexprs /// together for maximal sharing when rewriting bases. -static void SeparateSubExprs(std::vector<SCEVHandle> &SubExprs, +static void SeparateSubExprs(SmallVector<SCEVHandle, 16> &SubExprs, SCEVHandle Expr, ScalarEvolution *SE) { if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) { @@ -729,7 +716,7 @@ static void SeparateSubExprs(std::vector<SCEVHandle> &SubExprs, SubExprs.push_back(Expr); } else { // Compute the addrec with zero as its base. - std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end()); + SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Zero; // Start with zero base. SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop())); @@ -783,9 +770,9 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses, // UniqueSubExprs - Keep track of all of the subexpressions we see in the // order we see them. - std::vector<SCEVHandle> UniqueSubExprs; + SmallVector<SCEVHandle, 16> UniqueSubExprs; - std::vector<SCEVHandle> SubExprs; + SmallVector<SCEVHandle, 16> SubExprs; unsigned NumUsesInsideLoop = 0; for (unsigned i = 0; i != NumUses; ++i) { // If the user is outside the loop, just ignore it for base computation. @@ -1129,11 +1116,11 @@ static bool isNonConstantNegative(const SCEVHandle &Expr) { return SC->getValue()->getValue().isNegative(); } -// CollectIVUsers - Transform our list of users and offsets to a bit more -// complex table. In this new vector, each 'BasedUser' contains 'Base', the base -// of the strided accesses, as well as the old information from Uses. We -// progressively move information from the Base field to the Imm field, until -// we eventually have the full access expression to rewrite the use. +/// CollectIVUsers - Transform our list of users and offsets to a bit more +/// complex table. In this new vector, each 'BasedUser' contains 'Base', the base +/// of the strided accesses, as well as the old information from Uses. We +/// progressively move information from the Base field to the Imm field, until +/// we eventually have the full access expression to rewrite the use. SCEVHandle LoopStrengthReduce::CollectIVUsers(const SCEVHandle &Stride, IVUsersOfOneStride &Uses, Loop *L, @@ -2008,15 +1995,15 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, if (!isa<PointerType>(NewCmpTy)) NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal); else { - ConstantInt *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal); + Constant *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal); NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy); } NewOffset = TyBits == NewTyBits ? SE->getMulExpr(CondUse->getOffset(), - SE->getConstant(ConstantInt::get(CmpTy, Scale))) - : SE->getConstant(ConstantInt::get(NewCmpIntTy, + SE->getConstant(CmpTy, Scale)) + : SE->getConstant(NewCmpIntTy, cast<SCEVConstant>(CondUse->getOffset())->getValue() - ->getSExtValue()*Scale)); + ->getSExtValue()*Scale); break; } } @@ -2047,7 +2034,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, OldCond->replaceAllUsesWith(Cond); OldCond->eraseFromParent(); - IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS, false); + IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS); CondUse = &IU->IVUsesByStride[*NewStride]->Users.back(); CondStride = NewStride; ++NumEliminated; @@ -2057,8 +2044,8 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, return Cond; } -/// OptimizeSMax - Rewrite the loop's terminating condition if it uses -/// an smax computation. +/// OptimizeMax - Rewrite the loop's terminating condition if it uses +/// a max computation. /// /// This is a narrow solution to a specific, but acute, problem. For loops /// like this: @@ -2068,10 +2055,10 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, /// p[i] = 0.0; /// } while (++i < n); /// -/// where the comparison is signed, the trip count isn't just 'n', because -/// 'n' could be negative. And unfortunately this can come up even for loops -/// where the user didn't use a C do-while loop. For example, seemingly -/// well-behaved top-test loops will commonly be lowered like this: +/// the trip count isn't just 'n', because 'n' might not be positive. And +/// unfortunately this can come up even for loops where the user didn't use +/// a C do-while loop. For example, seemingly well-behaved top-test loops +/// will commonly be lowered like this: // /// if (n > 0) { /// i = 0; @@ -2084,14 +2071,14 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, /// test in such a way that indvars can't find it. /// /// When indvars can't find the if test in loops like this, it creates a -/// signed-max expression, which allows it to give the loop a canonical +/// max expression, which allows it to give the loop a canonical /// induction variable: /// /// i = 0; -/// smax = n < 1 ? 1 : n; +/// max = n < 1 ? 1 : n; /// do { /// p[i] = 0.0; -/// } while (++i != smax); +/// } while (++i != max); /// /// Canonical induction variables are necessary because the loop passes /// are designed around them. The most obvious example of this is the @@ -2107,8 +2094,8 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, /// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting /// the instructions for the maximum computation. /// -ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond, - IVStrideUse* &CondUse) { +ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond, + IVStrideUse* &CondUse) { // Check that the loop matches the pattern we're looking for. if (Cond->getPredicate() != CmpInst::ICMP_EQ && Cond->getPredicate() != CmpInst::ICMP_NE) @@ -2126,12 +2113,19 @@ ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond, SCEVHandle IterationCount = SE->getAddExpr(BackedgeTakenCount, One); // Check for a max calculation that matches the pattern. - const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(IterationCount); - if (!SMax || SMax != SE->getSCEV(Sel)) return Cond; + if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount)) + return Cond; + const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount); + if (Max != SE->getSCEV(Sel)) return Cond; + + // To handle a max with more than two operands, this optimization would + // require additional checking and setup. + if (Max->getNumOperands() != 2) + return Cond; - SCEVHandle SMaxLHS = SMax->getOperand(0); - SCEVHandle SMaxRHS = SMax->getOperand(1); - if (!SMaxLHS || SMaxLHS != One) return Cond; + SCEVHandle MaxLHS = Max->getOperand(0); + SCEVHandle MaxRHS = Max->getOperand(1); + if (!MaxLHS || MaxLHS != One) return Cond; // Check the relevant induction variable for conformance to // the pattern. @@ -2148,19 +2142,23 @@ ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond, // Check the right operand of the select, and remember it, as it will // be used in the new comparison instruction. Value *NewRHS = 0; - if (SE->getSCEV(Sel->getOperand(1)) == SMaxRHS) + if (SE->getSCEV(Sel->getOperand(1)) == MaxRHS) NewRHS = Sel->getOperand(1); - else if (SE->getSCEV(Sel->getOperand(2)) == SMaxRHS) + else if (SE->getSCEV(Sel->getOperand(2)) == MaxRHS) NewRHS = Sel->getOperand(2); if (!NewRHS) return Cond; + // Determine the new comparison opcode. It may be signed or unsigned, + // and the original comparison may be either equality or inequality. + CmpInst::Predicate Pred = + isa<SCEVSMaxExpr>(Max) ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT; + if (Cond->getPredicate() == CmpInst::ICMP_EQ) + Pred = CmpInst::getInversePredicate(Pred); + // Ok, everything looks ok to change the condition into an SLT or SGE and // delete the max calculation. ICmpInst *NewCond = - new ICmpInst(Cond->getPredicate() == CmpInst::ICMP_NE ? - CmpInst::ICMP_SLT : - CmpInst::ICMP_SGE, - Cond->getOperand(0), NewRHS, "scmp", Cond); + new ICmpInst(Pred, Cond->getOperand(0), NewRHS, "scmp", Cond); // Delete the max calculation instructions. Cond->replaceAllUsesWith(NewCond); @@ -2242,7 +2240,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry)); if (!Init) continue; - ConstantFP *NewInit = ConstantFP::get(DestTy, Init->getZExtValue()); + Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue()); BinaryOperator *Incr = dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch)); @@ -2266,7 +2264,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH); /* create new increment. '++d' in above example. */ - ConstantFP *CFP = ConstantFP::get(DestTy, C->getZExtValue()); + Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue()); BinaryOperator *NewIncr = BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ? Instruction::FAdd : Instruction::FSub, @@ -2284,9 +2282,9 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { } } -// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar -// uses in the loop, look to see if we can eliminate some, in favor of using -// common indvars for the different uses. +/// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar +/// uses in the loop, look to see if we can eliminate some, in favor of using +/// common indvars for the different uses. void LoopStrengthReduce::OptimizeIndvars(Loop *L) { // TODO: implement optzns here. @@ -2301,11 +2299,11 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { // induction variable, to allow coalescing the live ranges for the IV into // one register value. BasicBlock *LatchBlock = L->getLoopLatch(); - BasicBlock *ExitBlock = L->getExitingBlock(); - if (!ExitBlock) + BasicBlock *ExitingBlock = L->getExitingBlock(); + if (!ExitingBlock) // Multiple exits, just look at the exit in the latch block if there is one. - ExitBlock = LatchBlock; - BranchInst *TermBr = dyn_cast<BranchInst>(ExitBlock->getTerminator()); + ExitingBlock = LatchBlock; + BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); if (!TermBr) return; if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition())) @@ -2318,7 +2316,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { if (!FindIVUserForCond(Cond, CondUse, CondStride)) return; // setcc doesn't use the IV. - if (ExitBlock != LatchBlock) { + if (ExitingBlock != LatchBlock) { if (!Cond->hasOneUse()) // See below, we don't want the condition to be cloned. return; @@ -2373,14 +2371,14 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { StrideNoReuse.insert(*CondStride); } - // If the trip count is computed in terms of an smax (due to ScalarEvolution + // If the trip count is computed in terms of a max (due to ScalarEvolution // being unable to find a sufficient guard, for example), change the loop - // comparison to use SLT instead of NE. - Cond = OptimizeSMax(L, Cond, CondUse); + // comparison to use SLT or ULT instead of NE. + Cond = OptimizeMax(L, Cond, CondUse); // If possible, change stride and operands of the compare instruction to // eliminate one stride. - if (ExitBlock == LatchBlock) + if (ExitingBlock == LatchBlock) Cond = ChangeCompareStride(L, Cond, CondUse, CondStride); // It's possible for the setcc instruction to be anywhere in the loop, and @@ -2397,8 +2395,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { // Clone the IVUse, as the old use still exists! IU->IVUsesByStride[*CondStride]->addUser(CondUse->getOffset(), Cond, - CondUse->getOperandValToReplace(), - false); + CondUse->getOperandValToReplace()); CondUse = &IU->IVUsesByStride[*CondStride]->Users.back(); } } @@ -2413,9 +2410,9 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { ++NumLoopCond; } -// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding -// when to exit the loop is used only for that purpose, try to rearrange things -// so it counts down to a test against zero. +/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding +/// when to exit the loop is used only for that purpose, try to rearrange things +/// so it counts down to a test against zero. void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { // If the number of times the loop is executed isn't computable, give up. @@ -2506,7 +2503,7 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { Value *startVal = phi->getIncomingValue(inBlock); Value *endVal = Cond->getOperand(1); // FIXME check for case where both are constant - ConstantInt* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0); + Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0); BinaryOperator *NewStartVal = BinaryOperator::Create(Instruction::Sub, endVal, startVal, "tmp", PreInsertPt); diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 7143c7b..d89790c 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -820,10 +820,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> } else { // If EltTy is a vector type, get the element type. - const Type *ValTy = EltTy; - if (const VectorType *VTy = dyn_cast<VectorType>(ValTy)) - ValTy = VTy->getElementType(); - + const Type *ValTy = EltTy->getScalarType(); + // Construct an integer with the right value. unsigned EltSize = TD->getTypeSizeInBits(ValTy); APInt OneVal(EltSize, CI->getZExtValue()); diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 59989c9..bbcb792 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -135,7 +135,11 @@ Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) { TD->getIntPtrType(), PointerType::getUnqual(Type::Int8Ty), NULL); - return B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen"); + CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen"); + if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; } /// EmitMemCpy - Emit a call to the memcpy function to the builder. This always @@ -164,7 +168,12 @@ Value *LibCallOptimization::EmitMemChr(Value *Ptr, Value *Val, PointerType::getUnqual(Type::Int8Ty), Type::Int32Ty, TD->getIntPtrType(), NULL); - return B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr"); + CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr"); + + if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; } /// EmitMemCmp - Emit a call to the memcmp function. @@ -182,8 +191,13 @@ Value *LibCallOptimization::EmitMemCmp(Value *Ptr1, Value *Ptr2, PointerType::getUnqual(Type::Int8Ty), PointerType::getUnqual(Type::Int8Ty), TD->getIntPtrType(), NULL); - return B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), - Len, "memcmp"); + CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), + Len, "memcmp"); + + if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; } /// EmitMemSet - Emit a call to the memset function @@ -217,20 +231,30 @@ Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name, NameBuffer[NameLen+1] = 0; Name = NameBuffer; } - + Module *M = Caller->getParent(); - Value *Callee = M->getOrInsertFunction(Name, Op->getType(), + Value *Callee = M->getOrInsertFunction(Name, Op->getType(), Op->getType(), NULL); - return B.CreateCall(Callee, Op, Name); + CallInst *CI = B.CreateCall(Callee, Op, Name); + + if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; } /// EmitPutChar - Emit a call to the putchar function. This assumes that Char /// is an integer. void LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) { Module *M = Caller->getParent(); - Value *F = M->getOrInsertFunction("putchar", Type::Int32Ty, - Type::Int32Ty, NULL); - B.CreateCall(F, B.CreateIntCast(Char, Type::Int32Ty, "chari"), "putchar"); + Value *PutChar = M->getOrInsertFunction("putchar", Type::Int32Ty, + Type::Int32Ty, NULL); + CallInst *CI = B.CreateCall(PutChar, + B.CreateIntCast(Char, Type::Int32Ty, "chari"), + "putchar"); + + if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); } /// EmitPutS - Emit a call to the puts function. This assumes that Str is @@ -241,10 +265,14 @@ void LibCallOptimization::EmitPutS(Value *Str, IRBuilder<> &B) { AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); - Value *F = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2), - Type::Int32Ty, - PointerType::getUnqual(Type::Int8Ty), NULL); - B.CreateCall(F, CastToCStr(Str, B), "puts"); + Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2), + Type::Int32Ty, + PointerType::getUnqual(Type::Int8Ty), + NULL); + CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts"); + if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + } /// EmitFPutC - Emit a call to the fputc function. This assumes that Char is @@ -258,12 +286,14 @@ void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) { if (isa<PointerType>(File->getType())) F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), Type::Int32Ty, Type::Int32Ty, File->getType(), NULL); - else F = M->getOrInsertFunction("fputc", Type::Int32Ty, Type::Int32Ty, File->getType(), NULL); Char = B.CreateIntCast(Char, Type::Int32Ty, "chari"); - B.CreateCall2(F, Char, File, "fputc"); + CallInst *CI = B.CreateCall2(F, Char, File, "fputc"); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); } /// EmitFPutS - Emit a call to the puts function. Str is required to be a @@ -283,7 +313,10 @@ void LibCallOptimization::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B) { F = M->getOrInsertFunction("fputs", Type::Int32Ty, PointerType::getUnqual(Type::Int8Ty), File->getType(), NULL); - B.CreateCall2(F, CastToCStr(Str, B), File, "fputs"); + CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs"); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); } /// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is @@ -307,8 +340,11 @@ void LibCallOptimization::EmitFWrite(Value *Ptr, Value *Size, Value *File, PointerType::getUnqual(Type::Int8Ty), TD->getIntPtrType(), TD->getIntPtrType(), File->getType(), NULL); - B.CreateCall4(F, CastToCStr(Ptr, B), Size, - ConstantInt::get(TD->getIntPtrType(), 1), File); + CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size, + ConstantInt::get(TD->getIntPtrType(), 1), File); + + if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) + CI->setCallingConv(Fn->getCallingConv()); } //===----------------------------------------------------------------------===// @@ -673,12 +709,10 @@ struct VISIBILITY_HIDDEN StrCmpOpt : public LibCallOptimization { // strcmp(P, "x") -> memcmp(P, "x", 2) uint64_t Len1 = GetStringLength(Str1P); uint64_t Len2 = GetStringLength(Str2P); - if (Len1 || Len2) { - // Choose the smallest Len excluding 0 which means 'unknown'. - if (!Len1 || (Len2 && Len2 < Len1)) - Len1 = Len2; + if (Len1 && Len2) { return EmitMemCmp(Str1P, Str2P, - ConstantInt::get(TD->getIntPtrType(), Len1), B); + ConstantInt::get(TD->getIntPtrType(), + std::min(Len1, Len2)), B); } return 0; @@ -1039,7 +1073,7 @@ struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) LdExpArg = B.CreateZExt(OpC->getOperand(0), Type::Int32Ty, "tmp"); } - + if (LdExpArg) { const char *Name; if (Op->getType() == Type::FloatTy) @@ -1056,12 +1090,15 @@ struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization { Module *M = Caller->getParent(); Value *Callee = M->getOrInsertFunction(Name, Op->getType(), Op->getType(), Type::Int32Ty,NULL); - return B.CreateCall2(Callee, One, LdExpArg); + CallInst *CI = B.CreateCall2(Callee, One, LdExpArg); + if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; } return 0; } }; - //===---------------------------------------===// // Double -> Float Shrinking Optimizations for Unary Functions like 'floor' @@ -1072,7 +1109,7 @@ struct VISIBILITY_HIDDEN UnaryDoubleFPOpt : public LibCallOptimization { if (FT->getNumParams() != 1 || FT->getReturnType() != Type::DoubleTy || FT->getParamType(0) != Type::DoubleTy) return 0; - + // If this is something like 'floor((double)floatval)', convert to floorf. FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getOperand(1)); if (Cast == 0 || Cast->getOperand(0)->getType() != Type::FloatTy) diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 682d069..34ee57c 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -52,6 +52,7 @@ #define DEBUG_TYPE "tailcallelim" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -201,8 +202,21 @@ bool TailCallElim::runOnFunction(Function &F) { bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) { // FIXME: We can move load/store/call/free instructions above the call if the // call does not mod/ref the memory location being processed. - if (I->mayHaveSideEffects() || isa<LoadInst>(I)) + if (I->mayHaveSideEffects()) // This also handles volatile loads. return false; + + if (LoadInst* L = dyn_cast<LoadInst>(I)) { + // Loads may always be moved above calls without side effects. + if (CI->mayHaveSideEffects()) { + // Non-volatile loads may be moved above a call with side effects if it + // does not write to memory and the load provably won't trap. + // FIXME: Writes to memory only matter if they may alias the pointer + // being loaded from. + if (CI->mayWriteToMemory() || + !isSafeToLoadUnconditionally(L->getPointerOperand(), L)) + return false; + } + } // Otherwise, if this is a side-effect free instruction, check to make sure // that it does not use the return value of the call. If it doesn't use the diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 94483b8..c7fff54 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Constants.h" +#include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" @@ -28,6 +29,50 @@ using namespace llvm; //===----------------------------------------------------------------------===// +// Local analysis. +// + +/// isSafeToLoadUnconditionally - Return true if we know that executing a load +/// from this value cannot trap. If it is not obviously safe to load from the +/// specified pointer, we do a quick local scan of the basic block containing +/// ScanFrom, to determine if the address is already accessed. +bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) { + // If it is an alloca it is always safe to load from. + if (isa<AllocaInst>(V)) return true; + + // If it is a global variable it is mostly safe to load from. + if (const GlobalValue *GV = dyn_cast<GlobalVariable>(V)) + // Don't try to evaluate aliases. External weak GV can be null. + return !isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage(); + + // Otherwise, be a little bit agressive by scanning the local block where we + // want to check to see if the pointer is already being loaded or stored + // from/to. If so, the previous load or store would have already trapped, + // so there is no harm doing an extra load (also, CSE will later eliminate + // the load entirely). + BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin(); + + while (BBI != E) { + --BBI; + + // If we see a free or a call which may write to memory (i.e. which might do + // a free) the pointer could be marked invalid. + if (isa<FreeInst>(BBI) || + (isa<CallInst>(BBI) && BBI->mayWriteToMemory() && + !isa<DbgInfoIntrinsic>(BBI))) + return false; + + if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { + if (LI->getOperand(0) == V) return true; + } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { + if (SI->getOperand(1) == V) return true; + } + } + return false; +} + + +//===----------------------------------------------------------------------===// // Local constant propagation. // diff --git a/lib/Transforms/Utils/LowerAllocations.cpp b/lib/Transforms/Utils/LowerAllocations.cpp index 3249895..9af47f5 100644 --- a/lib/Transforms/Utils/LowerAllocations.cpp +++ b/lib/Transforms/Utils/LowerAllocations.cpp @@ -112,7 +112,7 @@ bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) { if (MallocInst *MI = dyn_cast<MallocInst>(I)) { const Type *AllocTy = MI->getType()->getElementType(); - // malloc(type) becomes sbyte *malloc(size) + // malloc(type) becomes i8 *malloc(size) Value *MallocArg; if (LowerMallocArgToInteger) MallocArg = ConstantInt::get(Type::Int64Ty, diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index bcc6b81..ee0f6a6 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -859,6 +859,26 @@ static bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI) { return Changed; } +// isSafeToHoistInvoke - If we would need to insert a select that uses the +// value of this invoke (comments in HoistThenElseCodeToIf explain why we +// would need to do this), we can't hoist the invoke, as there is nowhere +// to put the select in this case. +static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, + Instruction *I1, Instruction *I2) { + for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) { + PHINode *PN; + for (BasicBlock::iterator BBI = SI->begin(); + (PN = dyn_cast<PHINode>(BBI)); ++BBI) { + Value *BB1V = PN->getIncomingValueForBlock(BB1); + Value *BB2V = PN->getIncomingValueForBlock(BB2); + if (BB1V != BB2V && (BB1V==I1 || BB2V==I2)) { + return false; + } + } + } + return true; +} + /// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and /// BB2, hoist any common code in the two blocks up into the branch block. The /// caller of this function guarantees that BI's block dominates BB1 and BB2. @@ -879,8 +899,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { I1 = BB1_Itr++; while (isa<DbgInfoIntrinsic>(I2)) I2 = BB2_Itr++; - if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) || - isa<InvokeInst>(I1) || !I1->isIdenticalTo(I2)) + if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) || + !I1->isIdenticalTo(I2) || + (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))) return false; // If we get here, we can hoist at least one instruction. @@ -911,6 +932,10 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { return true; HoistTerminator: + // It may not be possible to hoist an invoke. + if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) + return true; + // Okay, it is safe to hoist the terminator. Instruction *NT = I1->clone(); BIParent->getInstList().insert(BI, NT); |