diff options
Diffstat (limited to 'lib/Transforms/InstCombine')
-rw-r--r-- | lib/Transforms/InstCombine/CMakeLists.txt | 8 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombine.h | 10 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineAddSub.cpp | 74 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 243 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCalls.cpp | 160 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCasts.cpp | 44 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCompares.cpp | 89 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp | 71 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineMulDivRem.cpp | 58 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineSelect.cpp | 39 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineShifts.cpp | 83 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 97 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 375 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineWorklist.h | 4 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/InstructionCombining.cpp | 85 | ||||
-rw-r--r-- | lib/Transforms/InstCombine/LLVMBuild.txt | 22 |
16 files changed, 889 insertions, 573 deletions
diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt index a46d5ad..d070ccc 100644 --- a/lib/Transforms/InstCombine/CMakeLists.txt +++ b/lib/Transforms/InstCombine/CMakeLists.txt @@ -13,11 +13,3 @@ add_llvm_library(LLVMInstCombine InstCombineSimplifyDemanded.cpp InstCombineVectorOps.cpp ) - -add_llvm_library_dependencies(LLVMInstCombine - LLVMAnalysis - LLVMCore - LLVMSupport - LLVMTarget - LLVMTransformUtils - ) diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 3808278..199df51 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -22,6 +22,7 @@ namespace llvm { class CallSite; class TargetData; + class TargetLibraryInfo; class DbgDeclareInst; class MemIntrinsic; class MemSetInst; @@ -71,6 +72,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner : public FunctionPass, public InstVisitor<InstCombiner, Instruction*> { TargetData *TD; + TargetLibraryInfo *TLI; bool MadeIRChange; public: /// Worklist - All of the instructions that need to be simplified. @@ -92,9 +94,11 @@ public: bool DoOneIteration(Function &F, unsigned ItNum); virtual void getAnalysisUsage(AnalysisUsage &AU) const; - + TargetData *getTargetData() const { return TD; } + TargetLibraryInfo *getTargetLibraryInfo() const { return TLI; } + // Visitation implementation - Implement instruction combining for different // instruction types. The semantics are as follows: // Return Value: @@ -287,9 +291,9 @@ public: return 0; // Don't do anything with FI } - void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero, + void ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, unsigned Depth = 0) const { - return llvm::ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth); + return llvm::ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); } bool MaskedValueIsZero(Value *V, const APInt &Mask, diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index d10046c..05e702f 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -136,6 +136,18 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext"); return BinaryOperator::CreateAShr(NewShl, ShAmt); } + + // If this is a xor that was canonicalized from a sub, turn it back into + // a sub and fuse this add with it. + if (LHS->hasOneUse() && (XorRHS->getValue()+1).isPowerOf2()) { + IntegerType *IT = cast<IntegerType>(I.getType()); + APInt LHSKnownOne(IT->getBitWidth(), 0); + APInt LHSKnownZero(IT->getBitWidth(), 0); + ComputeMaskedBits(XorLHS, LHSKnownZero, LHSKnownOne); + if ((XorRHS->getValue() | LHSKnownZero).isAllOnesValue()) + return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI), + XorLHS); + } } } @@ -189,14 +201,13 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // A+B --> A|B iff A and B have no bits set in common. if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { - APInt Mask = APInt::getAllOnesValue(IT->getBitWidth()); APInt LHSKnownOne(IT->getBitWidth(), 0); APInt LHSKnownZero(IT->getBitWidth(), 0); - ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); + ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne); if (LHSKnownZero != 0) { APInt RHSKnownOne(IT->getBitWidth(), 0); APInt RHSKnownZero(IT->getBitWidth(), 0); - ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); + ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); // No bits in common -> bitwise or. if ((LHSKnownZero|RHSKnownZero).isAllOnesValue()) @@ -466,57 +477,57 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize // this. bool Swapped = false; - GetElementPtrInst *GEP = 0; - ConstantExpr *CstGEP = 0; - - // TODO: Could also optimize &A[i] - &A[j] -> "i-j", and "&A.foo[i] - &A.foo". + GEPOperator *GEP1 = 0, *GEP2 = 0; + // For now we require one side to be the base pointer "A" or a constant - // expression derived from it. - if (GetElementPtrInst *LHSGEP = dyn_cast<GetElementPtrInst>(LHS)) { + // GEP derived from it. + if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) { // (gep X, ...) - X if (LHSGEP->getOperand(0) == RHS) { - GEP = LHSGEP; + GEP1 = LHSGEP; Swapped = false; - } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(RHS)) { - // (gep X, ...) - (ce_gep X, ...) - if (CE->getOpcode() == Instruction::GetElementPtr && - LHSGEP->getOperand(0) == CE->getOperand(0)) { - CstGEP = CE; - GEP = LHSGEP; + } else if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) { + // (gep X, ...) - (gep X, ...) + if (LHSGEP->getOperand(0)->stripPointerCasts() == + RHSGEP->getOperand(0)->stripPointerCasts()) { + GEP2 = RHSGEP; + GEP1 = LHSGEP; Swapped = false; } } } - if (GetElementPtrInst *RHSGEP = dyn_cast<GetElementPtrInst>(RHS)) { + if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) { // X - (gep X, ...) if (RHSGEP->getOperand(0) == LHS) { - GEP = RHSGEP; + GEP1 = RHSGEP; Swapped = true; - } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(LHS)) { - // (ce_gep X, ...) - (gep X, ...) - if (CE->getOpcode() == Instruction::GetElementPtr && - RHSGEP->getOperand(0) == CE->getOperand(0)) { - CstGEP = CE; - GEP = RHSGEP; + } else if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) { + // (gep X, ...) - (gep X, ...) + if (RHSGEP->getOperand(0)->stripPointerCasts() == + LHSGEP->getOperand(0)->stripPointerCasts()) { + GEP2 = LHSGEP; + GEP1 = RHSGEP; Swapped = true; } } } - if (GEP == 0) + // Avoid duplicating the arithmetic if GEP2 has non-constant indices and + // multiple users. + if (GEP1 == 0 || + (GEP2 != 0 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse())) return 0; // Emit the offset of the GEP and an intptr_t. - Value *Result = EmitGEPOffset(GEP); + Value *Result = EmitGEPOffset(GEP1); // If we had a constant expression GEP on the other side offsetting the // pointer, subtract it from the offset we have. - if (CstGEP) { - Value *CstOffset = EmitGEPOffset(CstGEP); - Result = Builder->CreateSub(Result, CstOffset); + if (GEP2) { + Value *Offset = EmitGEPOffset(GEP2); + Result = Builder->CreateSub(Result, Offset); } - // If we have p - gep(p, ...) then we have to negate the result. if (Swapped) @@ -587,6 +598,9 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { ConstantInt *C2; if (match(Op1, m_Add(m_Value(X), m_ConstantInt(C2)))) return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X); + + if (SimplifyDemandedInstructionBits(I)) + return &I; } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 5e0bfe8..0dbe11d 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -14,6 +14,7 @@ #include "InstCombine.h" #include "llvm/Intrinsics.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Transforms/Utils/CmpInstAnalysis.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; @@ -62,50 +63,6 @@ static inline Value *dyn_castNotVal(Value *V) { return 0; } - -/// getICmpCode - Encode a icmp predicate into a three bit mask. These bits -/// are carefully arranged to allow folding of expressions such as: -/// -/// (A < B) | (A > B) --> (A != B) -/// -/// Note that this is only valid if the first and second predicates have the -/// same sign. Is illegal to do: (A u< B) | (A s> B) -/// -/// Three bits are used to represent the condition, as follows: -/// 0 A > B -/// 1 A == B -/// 2 A < B -/// -/// <=> Value Definition -/// 000 0 Always false -/// 001 1 A > B -/// 010 2 A == B -/// 011 3 A >= B -/// 100 4 A < B -/// 101 5 A != B -/// 110 6 A <= B -/// 111 7 Always true -/// -static unsigned getICmpCode(const ICmpInst *ICI) { - switch (ICI->getPredicate()) { - // False -> 0 - case ICmpInst::ICMP_UGT: return 1; // 001 - case ICmpInst::ICMP_SGT: return 1; // 001 - case ICmpInst::ICMP_EQ: return 2; // 010 - case ICmpInst::ICMP_UGE: return 3; // 011 - case ICmpInst::ICMP_SGE: return 3; // 011 - case ICmpInst::ICMP_ULT: return 4; // 100 - case ICmpInst::ICMP_SLT: return 4; // 100 - case ICmpInst::ICMP_NE: return 5; // 101 - case ICmpInst::ICMP_ULE: return 6; // 110 - case ICmpInst::ICMP_SLE: return 6; // 110 - // True -> 7 - default: - llvm_unreachable("Invalid ICmp predicate!"); - return 0; - } -} - /// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp /// predicate into a three bit mask. It also returns whether it is an ordered /// predicate by reference. @@ -130,31 +87,19 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) { default: // Not expecting FCMP_FALSE and FCMP_TRUE; llvm_unreachable("Unexpected FCmp predicate!"); - return 0; } } -/// getICmpValue - This is the complement of getICmpCode, which turns an +/// getNewICmpValue - This is the complement of getICmpCode, which turns an /// opcode and two operands into either a constant true or false, or a brand /// new ICmp instruction. The sign is passed in to determine which kind /// of predicate to use in the new icmp instruction. -static Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS, - InstCombiner::BuilderTy *Builder) { - CmpInst::Predicate Pred; - switch (Code) { - default: assert(0 && "Illegal ICmp code!"); - case 0: // False. - return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); - case 1: Pred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; - case 2: Pred = ICmpInst::ICMP_EQ; break; - case 3: Pred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; - case 4: Pred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; - case 5: Pred = ICmpInst::ICMP_NE; break; - case 6: Pred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; - case 7: // True. - return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1); - } - return Builder->CreateICmp(Pred, LHS, RHS); +static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS, + InstCombiner::BuilderTy *Builder) { + ICmpInst::Predicate NewPred; + if (Value *NewConstant = getICmpValue(Sign, Code, LHS, RHS, NewPred)) + return NewConstant; + return Builder->CreateICmp(NewPred, LHS, RHS); } /// getFCmpValue - This is the complement of getFCmpCode, which turns an @@ -165,7 +110,7 @@ static Value *getFCmpValue(bool isordered, unsigned code, InstCombiner::BuilderTy *Builder) { CmpInst::Predicate Pred; switch (code) { - default: assert(0 && "Illegal FCmp code!"); + default: llvm_unreachable("Illegal FCmp code!"); case 0: Pred = isordered ? FCmpInst::FCMP_ORD : FCmpInst::FCMP_UNO; break; case 1: Pred = isordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT; break; case 2: Pred = isordered ? FCmpInst::FCMP_OEQ : FCmpInst::FCMP_UEQ; break; @@ -180,14 +125,6 @@ static Value *getFCmpValue(bool isordered, unsigned code, return Builder->CreateFCmp(Pred, LHS, RHS); } -/// PredicatesFoldable - Return true if both predicates match sign or if at -/// least one of them is an equality comparison (which is signless). -static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) { - return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) || - (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) || - (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1)); -} - // OptAndOp - This handles expressions of the form ((val OP C1) & C2). Where // the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is // guaranteed to be a binary operator. @@ -558,6 +495,38 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, return result; } +/// decomposeBitTestICmp - Decompose an icmp into the form ((X & Y) pred Z) +/// if possible. The returned predicate is either == or !=. Returns false if +/// decomposition fails. +static bool decomposeBitTestICmp(const ICmpInst *I, ICmpInst::Predicate &Pred, + Value *&X, Value *&Y, Value *&Z) { + // X < 0 is equivalent to (X & SignBit) != 0. + if (I->getPredicate() == ICmpInst::ICMP_SLT) + if (ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1))) + if (C->isZero()) { + X = I->getOperand(0); + Y = ConstantInt::get(I->getContext(), + APInt::getSignBit(C->getBitWidth())); + Pred = ICmpInst::ICMP_NE; + Z = C; + return true; + } + + // X > -1 is equivalent to (X & SignBit) == 0. + if (I->getPredicate() == ICmpInst::ICMP_SGT) + if (ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1))) + if (C->isAllOnesValue()) { + X = I->getOperand(0); + Y = ConstantInt::get(I->getContext(), + APInt::getSignBit(C->getBitWidth())); + Pred = ICmpInst::ICMP_EQ; + Z = ConstantInt::getNullValue(C->getType()); + return true; + } + + return false; +} + /// foldLogOpOfMaskedICmpsHelper: /// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) /// return the set of pattern classes (from MaskedICmpType) @@ -565,10 +534,9 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, Value*& B, Value*& C, Value*& D, Value*& E, - ICmpInst *LHS, ICmpInst *RHS) { - ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); - if (LHSCC != ICmpInst::ICMP_EQ && LHSCC != ICmpInst::ICMP_NE) return 0; - if (RHSCC != ICmpInst::ICMP_EQ && RHSCC != ICmpInst::ICMP_NE) return 0; + ICmpInst *LHS, ICmpInst *RHS, + ICmpInst::Predicate &LHSCC, + ICmpInst::Predicate &RHSCC) { if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) return 0; // vectors are not (yet?) supported if (LHS->getOperand(0)->getType()->isVectorTy()) return 0; @@ -582,40 +550,60 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, Value *L1 = LHS->getOperand(0); Value *L2 = LHS->getOperand(1); Value *L11,*L12,*L21,*L22; - if (match(L1, m_And(m_Value(L11), m_Value(L12)))) { - if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) + // Check whether the icmp can be decomposed into a bit test. + if (decomposeBitTestICmp(LHS, LHSCC, L11, L12, L2)) { + L21 = L22 = L1 = 0; + } else { + // Look for ANDs in the LHS icmp. + if (match(L1, m_And(m_Value(L11), m_Value(L12)))) { + if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) + L21 = L22 = 0; + } else { + if (!match(L2, m_And(m_Value(L11), m_Value(L12)))) + return 0; + std::swap(L1, L2); L21 = L22 = 0; - } - else { - if (!match(L2, m_And(m_Value(L11), m_Value(L12)))) - return 0; - std::swap(L1, L2); - L21 = L22 = 0; + } } + // Bail if LHS was a icmp that can't be decomposed into an equality. + if (!ICmpInst::isEquality(LHSCC)) + return 0; + Value *R1 = RHS->getOperand(0); Value *R2 = RHS->getOperand(1); Value *R11,*R12; bool ok = false; - if (match(R1, m_And(m_Value(R11), m_Value(R12)))) { - if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) { - A = R11; D = R12; E = R2; ok = true; + if (decomposeBitTestICmp(RHS, RHSCC, R11, R12, R2)) { + if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { + A = R11; D = R12; + } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { + A = R12; D = R11; + } else { + return 0; } - else - if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) { + E = R2; R1 = 0; ok = true; + } else if (match(R1, m_And(m_Value(R11), m_Value(R12)))) { + if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { + A = R11; D = R12; E = R2; ok = true; + } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { A = R12; D = R11; E = R2; ok = true; } } + + // Bail if RHS was a icmp that can't be decomposed into an equality. + if (!ICmpInst::isEquality(RHSCC)) + return 0; + + // Look for ANDs in on the right side of the RHS icmp. if (!ok && match(R2, m_And(m_Value(R11), m_Value(R12)))) { - if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) { - A = R11; D = R12; E = R1; ok = true; - } - else - if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) { + if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) { + A = R11; D = R12; E = R1; ok = true; + } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) { A = R12; D = R11; E = R1; ok = true; - } - else + } else { return 0; + } } if (!ok) return 0; @@ -644,8 +632,12 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, ICmpInst::Predicate NEWCC, llvm::InstCombiner::BuilderTy* Builder) { Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0; - unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS); + ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); + unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS, + LHSCC, RHSCC); if (mask == 0) return 0; + assert(ICmpInst::isEquality(LHSCC) && ICmpInst::isEquality(RHSCC) && + "foldLogOpOfMaskedICmpsHelper must return an equality predicate."); if (NEWCC == ICmpInst::ICMP_NE) mask >>= 1; // treat "Not"-states as normal states @@ -693,11 +685,11 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, ConstantInt *CCst = dyn_cast<ConstantInt>(C); if (CCst == 0) return 0; - if (LHS->getPredicate() != NEWCC) + if (LHSCC != NEWCC) CCst = dyn_cast<ConstantInt>( ConstantExpr::getXor(BCst, CCst) ); ConstantInt *ECst = dyn_cast<ConstantInt>(E); if (ECst == 0) return 0; - if (RHS->getPredicate() != NEWCC) + if (RHSCC != NEWCC) ECst = dyn_cast<ConstantInt>( ConstantExpr::getXor(DCst, ECst) ); ConstantInt* MCst = dyn_cast<ConstantInt>( ConstantExpr::getAnd(ConstantExpr::getAnd(BCst, DCst), @@ -728,7 +720,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); unsigned Code = getICmpCode(LHS) & getICmpCode(RHS); bool isSigned = LHS->isSigned() || RHS->isSigned(); - return getICmpValue(isSigned, Code, Op0, Op1, Builder); + return getNewICmpValue(isSigned, Code, Op0, Op1, Builder); } } @@ -756,24 +748,12 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { Value *NewOr = Builder->CreateOr(Val, Val2); return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } - - // (icmp slt A, 0) & (icmp slt B, 0) --> (icmp slt (A&B), 0) - if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) { - Value *NewAnd = Builder->CreateAnd(Val, Val2); - return Builder->CreateICmp(LHSCC, NewAnd, LHSCst); - } - - // (icmp sgt A, -1) & (icmp sgt B, -1) --> (icmp sgt (A|B), -1) - if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) { - Value *NewOr = Builder->CreateOr(Val, Val2); - return Builder->CreateICmp(LHSCC, NewOr, LHSCst); - } } // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2 // where CMAX is the all ones value for the truncated type, // iff the lower bits of C2 and CA are zero. - if (LHSCC == RHSCC && ICmpInst::isEquality(LHSCC) && + if (LHSCC == ICmpInst::ICMP_EQ && LHSCC == RHSCC && LHS->hasOneUse() && RHS->hasOneUse()) { Value *V; ConstantInt *AndCst, *SmallCst = 0, *BigCst = 0; @@ -805,7 +785,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { } } } - + // From here on, we only handle: // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler. if (Val != Val2) return 0; @@ -1382,13 +1362,8 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, // part of the value (e.g. byte 3) then it must be shifted right. If from the // low part, it must be shifted left. unsigned DestByteNo = InputByteNo + OverallLeftShift; - if (InputByteNo < ByteValues.size()/2) { - if (ByteValues.size()-1-DestByteNo != InputByteNo) - return true; - } else { - if (ByteValues.size()-1-DestByteNo != InputByteNo) - return true; - } + if (ByteValues.size()-1-DestByteNo != InputByteNo) + return true; // If the destination byte value is already defined, the values are or'd // together, which isn't a bswap (unless it's an or of the same bits). @@ -1469,7 +1444,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); unsigned Code = getICmpCode(LHS) | getICmpCode(RHS); bool isSigned = LHS->isSigned() || RHS->isSigned(); - return getICmpValue(isSigned, Code, Op0, Op1, Builder); + return getNewICmpValue(isSigned, Code, Op0, Op1, Builder); } } @@ -1490,18 +1465,6 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { Value *NewOr = Builder->CreateOr(Val, Val2); return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } - - // (icmp slt A, 0) | (icmp slt B, 0) --> (icmp slt (A|B), 0) - if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) { - Value *NewOr = Builder->CreateOr(Val, Val2); - return Builder->CreateICmp(LHSCC, NewOr, LHSCst); - } - - // (icmp sgt A, -1) | (icmp sgt B, -1) --> (icmp sgt (A&B), -1) - if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) { - Value *NewAnd = Builder->CreateAnd(Val, Val2); - return Builder->CreateICmp(LHSCC, NewAnd, LHSCst); - } } // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1) @@ -1586,7 +1549,6 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true return ConstantInt::getTrue(LHS->getContext()); } - break; case ICmpInst::ICMP_ULT: switch (RHSCC) { default: llvm_unreachable("Unknown integer condition code!"); @@ -1962,8 +1924,11 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } // Canonicalize xor to the RHS. - if (match(Op0, m_Xor(m_Value(), m_Value()))) + bool SwappedForXor = false; + if (match(Op0, m_Xor(m_Value(), m_Value()))) { std::swap(Op0, Op1); + SwappedForXor = true; + } // A | ( A ^ B) -> A | B // A | (~A ^ B) -> A | ~B @@ -1994,6 +1959,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return BinaryOperator::CreateOr(Not, Op0); } + if (SwappedForXor) + std::swap(Op0, Op1); + if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) if (Value *Res = FoldOrOfICmps(LHS, RHS)) @@ -2281,7 +2249,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS); bool isSigned = LHS->isSigned() || RHS->isSigned(); return ReplaceInstUsesWith(I, - getICmpValue(isSigned, Code, Op0, Op1, Builder)); + getNewICmpValue(isSigned, Code, Op0, Op1, + Builder)); } } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index c7b3ff8..77e4727 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -37,26 +37,26 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { unsigned CopyAlign = MI->getAlignment(); if (CopyAlign < MinAlign) { - MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), + MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), MinAlign, false)); return MI; } - + // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with // load/store. ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2)); if (MemOpLength == 0) return 0; - + // Source and destination pointer types are always "i8*" for intrinsic. See // if the size is something we can handle with a single primitive load/store. // A single load+store correctly handles overlapping memory in the memmove // case. unsigned Size = MemOpLength->getZExtValue(); if (Size == 0) return MI; // Delete this mem transfer. - + if (Size > 8 || (Size&(Size-1))) return 0; // If not 1/2/4/8 bytes, exit. - + // Use an integer load+store unless we can find something better. unsigned SrcAddrSp = cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace(); @@ -66,7 +66,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp); Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp); - + // Memcpy forces the use of i8* for the source and destination. That means // that if you're using memcpy to move one double around, you'll get a cast // from double* to i8*. We'd much rather use a double load+store rather than @@ -94,20 +94,20 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { } else break; } - + if (SrcETy->isSingleValueType()) { NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp); NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp); } } } - - + + // If the memcpy/memmove provides better alignment info than we can // infer, use it. SrcAlign = std::max(SrcAlign, CopyAlign); DstAlign = std::max(DstAlign, CopyAlign); - + Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile()); @@ -127,7 +127,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { Alignment, false)); return MI; } - + // Extract the length and alignment and fill if they are constant. ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength()); ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue()); @@ -135,14 +135,14 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { return 0; uint64_t Len = LenC->getZExtValue(); Alignment = MI->getAlignment(); - + // If the length is zero, this is a no-op if (Len == 0) return MI; // memset(d,c,0,a) -> noop - + // memset(s,c,n) -> store s, c (for n=1,2,4,8) if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8. - + Value *Dest = MI->getDest(); unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace(); Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp); @@ -150,13 +150,13 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { // Alignment 0 is identity for alignment 1 for memset, but not store. if (Alignment == 0) Alignment = 1; - + // Extract the fill value and store. uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest, MI->isVolatile()); S->setAlignment(Alignment); - + // Set the size of the copy to 0, it will be deleted on the next iteration. MI->setLength(Constant::getNullValue(LenC->getType())); return MI; @@ -165,7 +165,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { return 0; } -/// visitCallInst - CallInst simplification. This mostly only handles folding +/// visitCallInst - CallInst simplification. This mostly only handles folding /// of intrinsic instructions. For normal calls, it allows visitCallSite to do /// the heavy lifting. /// @@ -182,7 +182,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { CI.setDoesNotThrow(); return &CI; } - + IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI); if (!II) return visitCallSite(&CI); @@ -203,7 +203,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // alignment is sufficient. } } - + // No other transformations apply to volatile transfers. if (MI->isVolatile()) return 0; @@ -242,13 +242,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (Changed) return II; } - + switch (II->getIntrinsicID()) { default: break; case Intrinsic::objectsize: { // We need target data for just about everything so depend on it. if (!TD) break; - + Type *ReturnTy = CI.getType(); uint64_t DontKnow = II->getArgOperand(1) == Builder->getTrue() ? 0 : -1ULL; @@ -265,6 +265,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Get the current byte offset into the thing. Use the original // operand in case we're looking through a bitcast. SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end()); + if (!GEP->getPointerOperandType()->isPointerTy()) + return 0; Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops); Op1 = GEP->getPointerOperand()->stripPointerCasts(); @@ -322,7 +324,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) if (Operand->getIntrinsicID() == Intrinsic::bswap) return ReplaceInstUsesWith(CI, Operand->getArgOperand(0)); - + // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) if (TruncInst *TI = dyn_cast<TruncInst>(II->getArgOperand(0))) { if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0))) @@ -334,7 +336,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return new TruncInst(V, TI->getType()); } } - + break; case Intrinsic::powi: if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) { @@ -359,14 +361,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth), - KnownZero, KnownOne); + ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne); unsigned TrailingZeros = KnownOne.countTrailingZeros(); APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros)); if ((Mask & KnownZero) == Mask) return ReplaceInstUsesWith(CI, ConstantInt::get(IT, APInt(BitWidth, TrailingZeros))); - + } break; case Intrinsic::ctlz: { @@ -378,31 +379,29 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth), - KnownZero, KnownOne); + ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne); unsigned LeadingZeros = KnownOne.countLeadingZeros(); APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros)); if ((Mask & KnownZero) == Mask) return ReplaceInstUsesWith(CI, ConstantInt::get(IT, APInt(BitWidth, LeadingZeros))); - + } break; case Intrinsic::uadd_with_overflow: { Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); uint32_t BitWidth = IT->getBitWidth(); - APInt Mask = APInt::getSignBit(BitWidth); APInt LHSKnownZero(BitWidth, 0); APInt LHSKnownOne(BitWidth, 0); - ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); + ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne); bool LHSKnownNegative = LHSKnownOne[BitWidth - 1]; bool LHSKnownPositive = LHSKnownZero[BitWidth - 1]; if (LHSKnownNegative || LHSKnownPositive) { APInt RHSKnownZero(BitWidth, 0); APInt RHSKnownOne(BitWidth, 0); - ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); + ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); bool RHSKnownNegative = RHSKnownOne[BitWidth - 1]; bool RHSKnownPositive = RHSKnownZero[BitWidth - 1]; if (LHSKnownNegative && RHSKnownNegative) { @@ -448,7 +447,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // X + undef -> undef if (isa<UndefValue>(II->getArgOperand(1))) return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - + if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) { // X + 0 -> {X, false} if (RHS->isZero()) { @@ -469,7 +468,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (isa<UndefValue>(II->getArgOperand(0)) || isa<UndefValue>(II->getArgOperand(1))) return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - + if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) { // X - 0 -> {X, false} if (RHS->isZero()) { @@ -477,7 +476,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; - Constant *Struct = + Constant *Struct = ConstantStruct::get(cast<StructType>(II->getType()), V); return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } @@ -486,14 +485,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::umul_with_overflow: { Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); unsigned BitWidth = cast<IntegerType>(LHS->getType())->getBitWidth(); - APInt Mask = APInt::getAllOnesValue(BitWidth); APInt LHSKnownZero(BitWidth, 0); APInt LHSKnownOne(BitWidth, 0); - ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); + ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne); APInt RHSKnownZero(BitWidth, 0); APInt RHSKnownOne(BitWidth, 0); - ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); + ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); // Get the largest possible values for each operand. APInt LHSMax = ~LHSKnownZero; @@ -526,19 +524,19 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // X * undef -> undef if (isa<UndefValue>(II->getArgOperand(1))) return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - + if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) { // X*0 -> {0, false} if (RHSI->isZero()) return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType())); - + // X * 1 -> {X, false} if (RHSI->equalsInt(1)) { Constant *V[] = { UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; - Constant *Struct = + Constant *Struct = ConstantStruct::get(cast<StructType>(II->getType()), V); return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } @@ -557,7 +555,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_stvxl: // Turn stvx -> store if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, TD) >= 16) { - Type *OpPtrTy = + Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); return new StoreInst(II->getArgOperand(0), Ptr); @@ -568,7 +566,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_sse2_storeu_dq: // Turn X86 storeu -> store if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) { - Type *OpPtrTy = + Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(1)->getType()); Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy); return new StoreInst(II->getArgOperand(1), Ptr); @@ -621,19 +619,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_vperm: // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. - if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) { - assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!"); - + if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) { + assert(Mask->getType()->getVectorNumElements() == 16 && + "Bad type for intrinsic!"); + // Check that all of the elements are integer constants or undefs. bool AllEltsOk = true; for (unsigned i = 0; i != 16; ++i) { - if (!isa<ConstantInt>(Mask->getOperand(i)) && - !isa<UndefValue>(Mask->getOperand(i))) { + Constant *Elt = Mask->getAggregateElement(i); + if (Elt == 0 || + !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) { AllEltsOk = false; break; } } - + if (AllEltsOk) { // Cast the input vectors to byte vectors. Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), @@ -641,23 +641,24 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), Mask->getType()); Value *Result = UndefValue::get(Op0->getType()); - + // Only extract each element once. Value *ExtractedElts[32]; memset(ExtractedElts, 0, sizeof(ExtractedElts)); - + for (unsigned i = 0; i != 16; ++i) { - if (isa<UndefValue>(Mask->getOperand(i))) + if (isa<UndefValue>(Mask->getAggregateElement(i))) continue; - unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue(); + unsigned Idx = + cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue(); Idx &= 31; // Match the hardware behavior. - + if (ExtractedElts[Idx] == 0) { - ExtractedElts[Idx] = + ExtractedElts[Idx] = Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, Builder->getInt32(Idx&15)); } - + // Insert this value into the result vector. Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], Builder->getInt32(i)); @@ -703,7 +704,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return EraseInstFromFunction(CI); } } - + // Scan down this block to see if there is another stack restore in the // same block without an intervening call/alloca. BasicBlock::iterator BI = II; @@ -728,12 +729,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } } } - + // If the stack restore is in a return, resume, or unwind block and if there // are no allocas or calls between the restore and the return, nuke the // restore. - if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI) || - isa<UnwindInst>(TI))) + if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI))) return EraseInstFromFunction(CI); break; } @@ -748,7 +748,7 @@ Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) { return visitCallSite(&II); } -/// isSafeToEliminateVarargsCast - If this cast does not affect the value +/// isSafeToEliminateVarargsCast - If this cast does not affect the value /// passed through the varargs area, we can eliminate the use of the cast. static bool isSafeToEliminateVarargsCast(const CallSite CS, const CastInst * const CI, @@ -760,10 +760,10 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, // The size of ByVal arguments is derived from the type, so we // can't change to a type with a different size. If the size were // passed explicitly we could avoid this check. - if (!CS.paramHasAttr(ix, Attribute::ByVal)) + if (!CS.isByValArgument(ix)) return true; - Type* SrcTy = + Type* SrcTy = cast<PointerType>(CI->getOperand(0)->getType())->getElementType(); Type* DstTy = cast<PointerType>(CI->getType())->getElementType(); if (!SrcTy->isSized() || !DstTy->isSized()) @@ -807,7 +807,7 @@ public: } // end anonymous namespace // Try to fold some different type of calls here. -// Currently we're only working with the checking functions, memcpy_chk, +// Currently we're only working with the checking functions, memcpy_chk, // mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk, // strcat_chk and strncat_chk. Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) { @@ -916,7 +916,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { !CalleeF->isDeclaration()) { Instruction *OldCall = CS.getInstruction(); new StoreInst(ConstantInt::getTrue(Callee->getContext()), - UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), + UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), OldCall); // If OldCall dues not return void then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. @@ -924,7 +924,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType())); if (isa<CallInst>(OldCall)) return EraseInstFromFunction(*OldCall); - + // We cannot remove an invoke, because it would change the CFG, just // change the callee to a null pointer. cast<InvokeInst>(OldCall)->setCalledFunction( @@ -960,7 +960,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { PointerType *PTy = cast<PointerType>(Callee->getType()); FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); if (FTy->isVarArg()) { - int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1); + int ix = FTy->getNumParams(); // See if we can optimize any arguments passed through the varargs area of // the call. for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(), @@ -1061,17 +1061,17 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (!CastInst::isCastable(ActTy, ParamTy)) return false; // Cannot transform this parameter value. - unsigned Attrs = CallerPAL.getParamAttributes(i + 1); + Attributes Attrs = CallerPAL.getParamAttributes(i + 1); if (Attrs & Attribute::typeIncompatible(ParamTy)) return false; // Attribute not compatible with transformed value. - + // If the parameter is passed as a byval argument, then we have to have a // sized type and the sized type has to have the same size as the old type. if (ParamTy != ActTy && (Attrs & Attribute::ByVal)) { PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy); if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0) return false; - + Type *CurElTy = cast<PointerType>(ActTy)->getElementType(); if (TD->getTypeAllocSize(CurElTy) != TD->getTypeAllocSize(ParamPTy->getElementType())) @@ -1099,8 +1099,17 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType()); if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg()) return false; + + // If both the callee and the cast type are varargs, we still have to make + // sure the number of fixed parameters are the same or we have the same + // ABI issues as if we introduce a varargs call. + if (FT->isVarArg() && + cast<FunctionType>(APTy->getElementType())->isVarArg() && + FT->getNumParams() != + cast<FunctionType>(APTy->getElementType())->getNumParams()) + return false; } - + if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && !CallerPAL.isEmpty()) // In this case we have more arguments than the new function type, but we @@ -1114,7 +1123,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { return false; } - + // Okay, we decided that this is a safe thing to do: go ahead and start // inserting cast instructions as necessary. std::vector<Value*> Args; @@ -1352,11 +1361,11 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // Replace the trampoline call with a direct call. Let the generic // code sort out any function type mismatches. - FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, + FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg()); Constant *NewCallee = NestF->getType() == PointerType::getUnqual(NewFTy) ? - NestF : ConstantExpr::getBitCast(NestF, + NestF : ConstantExpr::getBitCast(NestF, PointerType::getUnqual(NewFTy)); const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(), NewAttrs.end()); @@ -1385,9 +1394,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // parameter, there is no need to adjust the argument list. Let the generic // code sort out any function type mismatches. Constant *NewCallee = - NestF->getType() == PTy ? NestF : + NestF->getType() == PTy ? NestF : ConstantExpr::getBitCast(NestF, PTy); CS.setCalledFunction(NewCallee); return CS.getInstruction(); } - diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index f10e48a..39279f4 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -14,6 +14,7 @@ #include "InstCombine.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; @@ -147,8 +148,6 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, return ReplaceInstUsesWith(CI, New); } - - /// EvaluateInDifferentType - Given an expression that /// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually /// insert the code to evaluate the expression. @@ -158,7 +157,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); // If we got a constantexpr back, try to simplify it with TD info. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - C = ConstantFoldConstantExpression(CE, TD); + C = ConstantFoldConstantExpression(CE, TD, TLI); return C; } @@ -216,7 +215,6 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, default: // TODO: Can handle more cases here. llvm_unreachable("Unreachable!"); - break; } Res->takeName(I); @@ -528,9 +526,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, return ReplaceInstUsesWith(CI, In); } - - - + // zext (X == 0) to i32 --> X^1 iff X has only the low bit set. // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. // zext (X == 1) to i32 --> X iff X has only the low bit set. @@ -545,8 +541,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, // If Op1C some other power of two, convert: uint32_t BitWidth = Op1C->getType()->getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - APInt TypeMask(APInt::getAllOnesValue(BitWidth)); - ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne); + ComputeMaskedBits(ICI->getOperand(0), KnownZero, KnownOne); APInt KnownZeroMask(~KnownZero); if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? @@ -594,9 +589,8 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0); APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0); - APInt TypeMask(APInt::getAllOnesValue(BitWidth)); - ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS); - ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS); + ComputeMaskedBits(LHS, KnownZeroLHS, KnownOneLHS); + ComputeMaskedBits(RHS, KnownZeroRHS, KnownOneRHS); if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) { APInt KnownBits = KnownZeroLHS | KnownOneLHS; @@ -915,8 +909,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){ unsigned BitWidth = Op1C->getType()->getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - APInt TypeMask(APInt::getAllOnesValue(BitWidth)); - ComputeMaskedBits(Op0, TypeMask, KnownZero, KnownOne); + ComputeMaskedBits(Op0, KnownZero, KnownOne); APInt KnownZeroMask(~KnownZero); if (KnownZeroMask.isPowerOf2()) { @@ -1163,6 +1156,9 @@ static Value *LookThroughFPExtensions(Value *V) { if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { if (CFP->getType() == Type::getPPC_FP128Ty(V->getContext())) return V; // No constant folding of this. + // See if the value can be truncated to half and then reextended. + if (Value *V = FitsInFPType(CFP, APFloat::IEEEhalf)) + return V; // See if the value can be truncated to float and then reextended. if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle)) return V; @@ -1213,10 +1209,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { } // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x) - // NOTE: This should be disabled by -fno-builtin-sqrt if we ever support it. CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0)); - if (Call && Call->getCalledFunction() && - Call->getCalledFunction()->getName() == "sqrt" && + if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) && + Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) && Call->getNumArgOperands() == 1 && Call->hasOneUse()) { CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0)); @@ -1423,16 +1418,15 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy, // Now that the element types match, get the shuffle mask and RHS of the // shuffle to use, which depends on whether we're increasing or decreasing the // size of the input. - SmallVector<Constant*, 16> ShuffleMask; + SmallVector<uint32_t, 16> ShuffleMask; Value *V2; - IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext()); if (SrcTy->getNumElements() > DestTy->getNumElements()) { // If we're shrinking the number of elements, just shuffle in the low // elements from the input and use undef as the second shuffle input. V2 = UndefValue::get(SrcTy); for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i) - ShuffleMask.push_back(ConstantInt::get(Int32Ty, i)); + ShuffleMask.push_back(i); } else { // If we're increasing the number of elements, shuffle in all of the @@ -1441,14 +1435,16 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy, V2 = Constant::getNullValue(SrcTy); unsigned SrcElts = SrcTy->getNumElements(); for (unsigned i = 0, e = SrcElts; i != e; ++i) - ShuffleMask.push_back(ConstantInt::get(Int32Ty, i)); + ShuffleMask.push_back(i); // The excess elements reference the first element of the zero input. - ShuffleMask.append(DestTy->getNumElements()-SrcElts, - ConstantInt::get(Int32Ty, SrcElts)); + for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i) + ShuffleMask.push_back(SrcElts); } - return new ShuffleVectorInst(InVal, V2, ConstantVector::get(ShuffleMask)); + return new ShuffleVectorInst(InVal, V2, + ConstantDataVector::get(V2->getContext(), + ShuffleMask)); } static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) { diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index bb1cbfa..ab2987f 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -203,8 +203,12 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // We need TD information to know the pointer size unless this is inbounds. if (!GEP->isInBounds() && TD == 0) return 0; - ConstantArray *Init = dyn_cast<ConstantArray>(GV->getInitializer()); - if (Init == 0 || Init->getNumOperands() > 1024) return 0; + Constant *Init = GV->getInitializer(); + if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init)) + return 0; + + uint64_t ArrayElementCount = Init->getType()->getArrayNumElements(); + if (ArrayElementCount > 1024) return 0; // Don't blow up on huge arrays. // There are many forms of this optimization we can handle, for now, just do // the simple index into a single-dimensional array. @@ -221,7 +225,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // structs. SmallVector<unsigned, 4> LaterIndices; - Type *EltTy = cast<ArrayType>(Init->getType())->getElementType(); + Type *EltTy = Init->getType()->getArrayElementType(); for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i)); if (Idx == 0) return 0; // Variable index. @@ -272,8 +276,9 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // Scan the array and see if one of our patterns matches. Constant *CompareRHS = cast<Constant>(ICI.getOperand(1)); - for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) { - Constant *Elt = Init->getOperand(i); + for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) { + Constant *Elt = Init->getAggregateElement(i); + if (Elt == 0) return 0; // If this is indexing an array of structures, get the structure element. if (!LaterIndices.empty()) @@ -284,7 +289,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // Find out if the comparison would be true or false for the i'th element. Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt, - CompareRHS, TD); + CompareRHS, TD, TLI); // If the result is undef for this element, ignore it. if (isa<UndefValue>(C)) { // Extend range state machines to cover this element in case there is an @@ -440,10 +445,10 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // If a 32-bit or 64-bit magic bitvector captures the entire comparison state // of this load, replace it with computation that does: // ((magic_cst >> i) & 1) != 0 - if (Init->getNumOperands() <= 32 || - (TD && Init->getNumOperands() <= 64 && TD->isLegalInteger(64))) { + if (ArrayElementCount <= 32 || + (TD && ArrayElementCount <= 64 && TD->isLegalInteger(64))) { Type *Ty; - if (Init->getNumOperands() <= 32) + if (ArrayElementCount <= 32) Ty = Type::getInt32Ty(Init->getContext()); else Ty = Type::getInt64Ty(Init->getContext()); @@ -566,6 +571,14 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, ICmpInst::Predicate Cond, Instruction &I) { + // Don't transform signed compares of GEPs into index compares. Even if the + // GEP is inbounds, the final add of the base pointer can have signed overflow + // and would change the result of the icmp. + // e.g. "&foo[0] <s &foo[1]" can't be folded to "true" because "foo" could be + // the maximum signed value for the pointer type. + if (ICmpInst::isSigned(Cond)) + return 0; + // Look through bitcasts. if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS)) RHS = BCI->getOperand(0); @@ -602,6 +615,20 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, return new ICmpInst(ICmpInst::getSignedPredicate(Cond), GEPLHS->getOperand(0), GEPRHS->getOperand(0)); + // If we're comparing GEPs with two base pointers that only differ in type + // and both GEPs have only constant indices or just one use, then fold + // the compare with the adjusted indices. + if (TD && GEPLHS->isInBounds() && GEPRHS->isInBounds() && + (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) && + (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) && + PtrBase->stripPointerCasts() == + GEPRHS->getOperand(0)->stripPointerCasts()) { + Value *Cmp = Builder->CreateICmp(ICmpInst::getSignedPredicate(Cond), + EmitGEPOffset(GEPLHS), + EmitGEPOffset(GEPRHS)); + return ReplaceInstUsesWith(I, Cmp); + } + // Otherwise, the base pointers are different and the indices are // different, bail out. return 0; @@ -1001,9 +1028,8 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // of the high bits truncated out of x are known. unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(), SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits(); - APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits)); APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0); - ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne); + ComputeMaskedBits(LHSI->getOperand(0), KnownZero, KnownOne); // If all the high bits are known, we can do this xform. if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) { @@ -1657,6 +1683,14 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth)) return 0; + // This is only really a signed overflow check if the inputs have been + // sign-extended; check for that condition. For example, if CI2 is 2^31 and + // the operands of the add are 64 bits wide, we need at least 33 sign bits. + unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1; + if (IC.ComputeNumSignBits(A) < NeededSignBits || + IC.ComputeNumSignBits(B) < NeededSignBits) + return 0; + // In order to replace the original add with a narrower // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant // and truncates that discard the high bits of the add. Verify that this is @@ -1787,6 +1821,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); + // comparing -val or val with non-zero is the same as just comparing val + // ie, abs(val) != 0 -> val != 0 + if (I.getPredicate() == ICmpInst::ICMP_NE && match(Op1, m_Zero())) + { + Value *Cond, *SelectTrue, *SelectFalse; + if (match(Op0, m_Select(m_Value(Cond), m_Value(SelectTrue), + m_Value(SelectFalse)))) { + if (Value *V = dyn_castNegVal(SelectTrue)) { + if (V == SelectFalse) + return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1); + } + else if (Value *V = dyn_castNegVal(SelectFalse)) { + if (V == SelectTrue) + return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1); + } + } + } + Type *Ty = Op0->getType(); // icmp's with boolean values can always be turned into bitwise operations @@ -2683,6 +2735,17 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); } + } else { + // See if the RHS value is < UnsignedMin. + APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false); + SMin.convertFromAPInt(APInt::getMinValue(IntWidth), true, + APFloat::rmNearestTiesToEven); + if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // umin > 12312.0 + if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT || + Pred == ICmpInst::ICMP_UGE) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + } } // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or @@ -2822,7 +2885,9 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { const fltSemantics *Sem; // FIXME: This shouldn't be here. - if (LHSExt->getSrcTy()->isFloatTy()) + if (LHSExt->getSrcTy()->isHalfTy()) + Sem = &APFloat::IEEEhalf; + else if (LHSExt->getSrcTy()->isFloatTy()) Sem = &APFloat::IEEEsingle; else if (LHSExt->getSrcTy()->isDoubleTy()) Sem = &APFloat::IEEEdouble; diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 7446a51..b2f2e24 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -22,6 +22,72 @@ using namespace llvm; STATISTIC(NumDeadStore, "Number of dead stores eliminated"); +// Try to kill dead allocas by walking through its uses until we see some use +// that could escape. This is a conservative analysis which tries to handle +// GEPs, bitcasts, stores, and no-op intrinsics. These tend to be the things +// left after inlining and SROA finish chewing on an alloca. +static Instruction *removeDeadAlloca(InstCombiner &IC, AllocaInst &AI) { + SmallVector<Instruction *, 4> Worklist, DeadStores; + Worklist.push_back(&AI); + do { + Instruction *PI = Worklist.pop_back_val(); + for (Value::use_iterator UI = PI->use_begin(), UE = PI->use_end(); + UI != UE; ++UI) { + Instruction *I = cast<Instruction>(*UI); + switch (I->getOpcode()) { + default: + // Give up the moment we see something we can't handle. + return 0; + + case Instruction::GetElementPtr: + case Instruction::BitCast: + Worklist.push_back(I); + continue; + + case Instruction::Call: + // We can handle a limited subset of calls to no-op intrinsics. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + continue; + default: + return 0; + } + } + // Reject everything else. + return 0; + + case Instruction::Store: { + // Stores into the alloca are only live if the alloca is live. + StoreInst *SI = cast<StoreInst>(I); + // We can eliminate atomic stores, but not volatile. + if (SI->isVolatile()) + return 0; + // The store is only trivially safe if the poniter is the destination + // as opposed to the value. We're conservative here and don't check for + // the case where we store the address of a dead alloca into a dead + // alloca. + if (SI->getPointerOperand() != PI) + return 0; + DeadStores.push_back(I); + continue; + } + } + } + } while (!Worklist.empty()); + + // The alloca is dead. Kill off all the stores to it, and then replace it + // with undef. + while (!DeadStores.empty()) + IC.EraseInstFromFunction(*DeadStores.pop_back_val()); + return IC.ReplaceInstUsesWith(AI, UndefValue::get(AI.getType())); +} + Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Ensure that the alloca array size argument has type intptr_t, so that // any casting is exposed early. @@ -81,7 +147,10 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType())); } - return 0; + // Try to aggressively remove allocas which are only used for GEPs, lifetime + // markers, and stores. This happens when SROA iteratively promotes stores + // out of the alloca, and we need to cleanup after it. + return removeDeadAlloca(*this, AI); } diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 7f48125..5168e2a 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -256,22 +256,18 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - // Simplify mul instructions with a constant RHS... + // Simplify mul instructions with a constant RHS. if (Constant *Op1C = dyn_cast<Constant>(Op1)) { if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1C)) { // "In IEEE floating point, x*1 is not equivalent to x for nans. However, // ANSI says we can drop signals, so we can do this anyway." (from GCC) if (Op1F->isExactlyValue(1.0)) return ReplaceInstUsesWith(I, Op0); // Eliminate 'fmul double %X, 1.0' - } else if (Op1C->getType()->isVectorTy()) { - if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) { - // As above, vector X*splat(1.0) -> X in all defined cases. - if (Constant *Splat = Op1V->getSplatValue()) { - if (ConstantFP *F = dyn_cast<ConstantFP>(Splat)) - if (F->isExactlyValue(1.0)) - return ReplaceInstUsesWith(I, Op0); - } - } + } else if (ConstantDataVector *Op1V = dyn_cast<ConstantDataVector>(Op1C)) { + // As above, vector X*splat(1.0) -> X in all defined cases. + if (ConstantFP *F = dyn_cast_or_null<ConstantFP>(Op1V->getSplatValue())) + if (F->isExactlyValue(1.0)) + return ReplaceInstUsesWith(I, Op0); } // Try to fold constant mul into select arguments. @@ -441,19 +437,23 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { // Handle the integer div common cases if (Instruction *Common = commonIDivTransforms(I)) return Common; - - if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) { + + { // X udiv 2^C -> X >> C // Check to see if this is an unsigned division with an exact power of 2, // if so, convert to a right shift. - if (C->getValue().isPowerOf2()) { // 0 not included in isPowerOf2 + const APInt *C; + if (match(Op1, m_Power2(C))) { BinaryOperator *LShr = - BinaryOperator::CreateLShr(Op0, - ConstantInt::get(Op0->getType(), C->getValue().logBase2())); + BinaryOperator::CreateLShr(Op0, + ConstantInt::get(Op0->getType(), + C->logBase2())); if (I.isExact()) LShr->setIsExact(); return LShr; } + } + if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) { // X udiv C, where C >= signbit if (C->getValue().isNegative()) { Value *IC = Builder->CreateICmpULT(Op0, C); @@ -684,28 +684,36 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { } // If it's a constant vector, flip any negative values positive. - if (ConstantVector *RHSV = dyn_cast<ConstantVector>(Op1)) { - unsigned VWidth = RHSV->getNumOperands(); + if (isa<ConstantVector>(Op1) || isa<ConstantDataVector>(Op1)) { + Constant *C = cast<Constant>(Op1); + unsigned VWidth = C->getType()->getVectorNumElements(); bool hasNegative = false; - for (unsigned i = 0; !hasNegative && i != VWidth; ++i) - if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) + bool hasMissing = false; + for (unsigned i = 0; i != VWidth; ++i) { + Constant *Elt = C->getAggregateElement(i); + if (Elt == 0) { + hasMissing = true; + break; + } + + if (ConstantInt *RHS = dyn_cast<ConstantInt>(Elt)) if (RHS->isNegative()) hasNegative = true; + } - if (hasNegative) { - std::vector<Constant *> Elts(VWidth); + if (hasNegative && !hasMissing) { + SmallVector<Constant *, 16> Elts(VWidth); for (unsigned i = 0; i != VWidth; ++i) { - if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) { + Elts[i] = C->getAggregateElement(i); // Handle undef, etc. + if (ConstantInt *RHS = dyn_cast<ConstantInt>(Elts[i])) { if (RHS->isNegative()) Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS)); - else - Elts[i] = RHS; } } Constant *NewRHSV = ConstantVector::get(Elts); - if (NewRHSV != RHSV) { + if (NewRHSV != C) { // Don't loop on -MININT Worklist.AddValue(I.getOperand(1)); I.setOperand(1, NewRHSV); return &I; diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 91e60a4..e727b2c 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -184,7 +184,6 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp); } llvm_unreachable("Shouldn't get here"); - return 0; } static bool isSelect01(Constant *C1, Constant *C2) { @@ -282,7 +281,8 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, /// SimplifyWithOpReplaced - See if V simplifies when its operand Op is /// replaced with RepOp. static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, - const TargetData *TD) { + const TargetData *TD, + const TargetLibraryInfo *TLI) { // Trivial replacement. if (V == Op) return RepOp; @@ -294,17 +294,19 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, // If this is a binary operator, try to simplify it with the replaced op. if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) { if (B->getOperand(0) == Op) - return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD); + return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD, TLI); if (B->getOperand(1) == Op) - return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD); + return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD, TLI); } // Same for CmpInsts. if (CmpInst *C = dyn_cast<CmpInst>(I)) { if (C->getOperand(0) == Op) - return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD); + return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD, + TLI); if (C->getOperand(1) == Op) - return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD); + return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD, + TLI); } // TODO: We could hand off more cases to instsimplify here. @@ -330,7 +332,7 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, return ConstantFoldLoadFromConstPtr(ConstOps[0], TD); return ConstantFoldInstOperands(I->getOpcode(), I->getType(), - ConstOps, TD); + ConstOps, TD, TLI); } } @@ -479,18 +481,18 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, // arms of the select. See if substituting this value into the arm and // simplifying the result yields the same value as the other arm. if (Pred == ICmpInst::ICMP_EQ) { - if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal || - SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal) + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD, TLI) == TrueVal || + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD, TLI) == TrueVal) return ReplaceInstUsesWith(SI, FalseVal); - if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal || - SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal) + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD, TLI) == FalseVal || + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD, TLI) == FalseVal) return ReplaceInstUsesWith(SI, FalseVal); } else if (Pred == ICmpInst::ICMP_NE) { - if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal || - SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal) + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD, TLI) == FalseVal || + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD, TLI) == FalseVal) return ReplaceInstUsesWith(SI, TrueVal); - if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal || - SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal) + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD, TLI) == TrueVal || + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD, TLI) == TrueVal) return ReplaceInstUsesWith(SI, TrueVal); } @@ -679,6 +681,13 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return BinaryOperator::CreateOr(CondVal, FalseVal); else if (CondVal == FalseVal) return BinaryOperator::CreateAnd(CondVal, TrueVal); + + // select a, ~a, b -> (~a)&b + // select a, b, ~a -> (~a)|b + if (match(TrueVal, m_Not(m_Specific(CondVal)))) + return BinaryOperator::CreateAnd(TrueVal, FalseVal); + else if (match(FalseVal, m_Not(m_Specific(CondVal)))) + return BinaryOperator::CreateOr(TrueVal, FalseVal); } // Selecting between two integer constants? diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 6d85add..b31049e 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -190,7 +190,8 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, V = IC.Builder->CreateLShr(C, NumBits); // If we got a constantexpr back, try to simplify it with TD info. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - V = ConstantFoldConstantExpression(CE, IC.getTargetData()); + V = ConstantFoldConstantExpression(CE, IC.getTargetData(), + IC.getTargetLibraryInfo()); return V; } @@ -198,7 +199,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, IC.Worklist.Add(I); switch (I->getOpcode()) { - default: assert(0 && "Inconsistency with CanEvaluateShifted"); + default: llvm_unreachable("Inconsistency with CanEvaluateShifted"); case Instruction::And: case Instruction::Or: case Instruction::Xor: @@ -535,12 +536,11 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, if (ShiftAmt1 == 0) return 0; // Will be simplified in the future. Value *X = ShiftOp->getOperand(0); - uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. - IntegerType *Ty = cast<IntegerType>(I.getType()); // Check for (X << c1) << c2 and (X >> c1) >> c2 if (I.getOpcode() == ShiftOp->getOpcode()) { + uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. // If this is oversized composite shift, then unsigned shifts get 0, ashr // saturates. if (AmtSum >= TypeBits) { @@ -576,7 +576,16 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, ShiftOp->getOpcode() != Instruction::Shl) { assert(ShiftOp->getOpcode() == Instruction::LShr || ShiftOp->getOpcode() == Instruction::AShr); - Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); + ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); + if (ShiftOp->isExact()) { + // (X >>?,exact C1) << C2 --> X << (C2-C1) + BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl, + X, ShiftDiffCst); + NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); + NewShl->setHasNoSignedWrap(I.hasNoSignedWrap()); + return NewShl; + } + Value *Shift = Builder->CreateShl(X, ShiftDiffCst); APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); return BinaryOperator::CreateAnd(Shift, @@ -586,15 +595,34 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) if (I.getOpcode() == Instruction::LShr && ShiftOp->getOpcode() == Instruction::Shl) { - assert(ShiftOp->getOpcode() == Instruction::Shl); - Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff)); + ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); + // (X <<nuw C1) >>u C2 --> X >>u (C2-C1) + if (ShiftOp->hasNoUnsignedWrap()) { + BinaryOperator *NewLShr = BinaryOperator::Create(Instruction::LShr, + X, ShiftDiffCst); + NewLShr->setIsExact(I.isExact()); + return NewLShr; + } + Value *Shift = Builder->CreateLShr(X, ShiftDiffCst); APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); return BinaryOperator::CreateAnd(Shift, ConstantInt::get(I.getContext(),Mask)); } - - // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. + + // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However, + // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits. + if (I.getOpcode() == Instruction::AShr && + ShiftOp->getOpcode() == Instruction::Shl) { + if (ShiftOp->hasNoSignedWrap()) { + // (X <<nsw C1) >>s C2 --> X >>s (C2-C1) + ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); + BinaryOperator *NewAShr = BinaryOperator::Create(Instruction::AShr, + X, ShiftDiffCst); + NewAShr->setIsExact(I.isExact()); + return NewAShr; + } + } } else { assert(ShiftAmt2 < ShiftAmt1); uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2; @@ -602,9 +630,16 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2) if (I.getOpcode() == Instruction::Shl && ShiftOp->getOpcode() != Instruction::Shl) { - Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X, - ConstantInt::get(Ty, ShiftDiff)); - + ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); + if (ShiftOp->isExact()) { + // (X >>?exact C1) << C2 --> X >>?exact (C1-C2) + BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(), + X, ShiftDiffCst); + NewShr->setIsExact(true); + return NewShr; + } + Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), + X, ShiftDiffCst); APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); return BinaryOperator::CreateAnd(Shift, ConstantInt::get(I.getContext(),Mask)); @@ -613,14 +648,34 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) if (I.getOpcode() == Instruction::LShr && ShiftOp->getOpcode() == Instruction::Shl) { - Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); + ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); + if (ShiftOp->hasNoUnsignedWrap()) { + // (X <<nuw C1) >>u C2 --> X <<nuw (C1-C2) + BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl, + X, ShiftDiffCst); + NewShl->setHasNoUnsignedWrap(true); + return NewShl; + } + Value *Shift = Builder->CreateShl(X, ShiftDiffCst); APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); return BinaryOperator::CreateAnd(Shift, ConstantInt::get(I.getContext(),Mask)); } - // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in. + // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However, + // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits. + if (I.getOpcode() == Instruction::AShr && + ShiftOp->getOpcode() == Instruction::Shl) { + if (ShiftOp->hasNoSignedWrap()) { + // (X <<nsw C1) >>s C2 --> X <<nsw (C1-C2) + ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); + BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl, + X, ShiftDiffCst); + NewShl->setHasNoSignedWrap(true); + return NewShl; + } + } } } return 0; diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 5cd9a4b..125c74a 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -142,7 +142,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Instruction *I = dyn_cast<Instruction>(V); if (!I) { - ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth); + ComputeMaskedBits(V, KnownZero, KnownOne, Depth); return 0; // Only analyze instructions. } @@ -156,10 +156,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // this instruction has a simpler value in that context. if (I->getOpcode() == Instruction::And) { // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(I->getOperand(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownZero, - LHSKnownZero, LHSKnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); // If all of the demanded bits are known 1 on one side, return the other. // These bits cannot contribute to the result of the 'and' in this @@ -180,10 +178,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // only bits from X or Y are demanded. // If either the LHS or the RHS are One, the result is One. - ComputeMaskedBits(I->getOperand(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownOne, - LHSKnownZero, LHSKnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); // If all of the demanded bits are known zero on one side, return the // other. These bits cannot contribute to the result of the 'or' in this @@ -206,7 +202,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } // Compute the KnownZero/KnownOne bits to simplify things downstream. - ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth); + ComputeMaskedBits(I, KnownZero, KnownOne, Depth); return 0; } @@ -219,7 +215,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, switch (I->getOpcode()) { default: - ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth); + ComputeMaskedBits(I, KnownZero, KnownOne, Depth); break; case Instruction::And: // If either the LHS or the RHS are Zero, the result is zero. @@ -567,9 +563,20 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, LHSKnownZero, LHSKnownOne, Depth+1)) return I; } + // Otherwise just hand the sub off to ComputeMaskedBits to fill in // the known zeros and ones. - ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth); + ComputeMaskedBits(V, KnownZero, KnownOne, Depth); + + // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known + // zero. + if (ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(0))) { + APInt I0 = C0->getValue(); + if ((I0 + 1).isPowerOf2() && (I0 | KnownZero).isAllOnesValue()) { + Instruction *Xor = BinaryOperator::CreateXor(I->getOperand(1), C0); + return InsertNewInstWith(Xor, *I); + } + } break; case Instruction::Shl: if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { @@ -671,8 +678,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] || (HighBits & ~DemandedMask) == HighBits) { // Perform the logical shift right. - Instruction *NewVal = BinaryOperator::CreateLShr( - I->getOperand(0), SA, I->getName()); + BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0), + SA, I->getName()); + NewVal->setIsExact(cast<BinaryOperator>(I)->isExact()); return InsertNewInstWith(NewVal, *I); } else if ((KnownOne & SignBit) != 0) { // New bits are known one. KnownOne |= HighBits; @@ -717,10 +725,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // The sign bit is the LHS's sign bit, except when the result of the // remainder is zero. if (DemandedMask.isNegative() && KnownZero.isNonNegative()) { - APInt Mask2 = APInt::getSignBit(BitWidth); APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, - Depth+1); + ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); // If it's known zero, our sign bit is also zero. if (LHSKnownZero.isNegative()) KnownZero |= LHSKnownZero; @@ -783,7 +789,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return 0; } } - ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth); + ComputeMaskedBits(V, KnownZero, KnownOne, Depth); break; } @@ -822,46 +828,39 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, } UndefElts = 0; - if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) { + + // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential. + if (Constant *C = dyn_cast<Constant>(V)) { + // Check if this is identity. If so, return 0 since we are not simplifying + // anything. + if (DemandedElts.isAllOnesValue()) + return 0; + Type *EltTy = cast<VectorType>(V->getType())->getElementType(); Constant *Undef = UndefValue::get(EltTy); - - std::vector<Constant*> Elts; - for (unsigned i = 0; i != VWidth; ++i) + + SmallVector<Constant*, 16> Elts; + for (unsigned i = 0; i != VWidth; ++i) { if (!DemandedElts[i]) { // If not demanded, set to undef. Elts.push_back(Undef); UndefElts.setBit(i); - } else if (isa<UndefValue>(CV->getOperand(i))) { // Already undef. + continue; + } + + Constant *Elt = C->getAggregateElement(i); + if (Elt == 0) return 0; + + if (isa<UndefValue>(Elt)) { // Already undef. Elts.push_back(Undef); UndefElts.setBit(i); } else { // Otherwise, defined. - Elts.push_back(CV->getOperand(i)); + Elts.push_back(Elt); } - - // If we changed the constant, return it. - Constant *NewCP = ConstantVector::get(Elts); - return NewCP != CV ? NewCP : 0; - } - - if (isa<ConstantAggregateZero>(V)) { - // Simplify the CAZ to a ConstantVector where the non-demanded elements are - // set to undef. - - // Check if this is identity. If so, return 0 since we are not simplifying - // anything. - if (DemandedElts.isAllOnesValue()) - return 0; - - Type *EltTy = cast<VectorType>(V->getType())->getElementType(); - Constant *Zero = Constant::getNullValue(EltTy); - Constant *Undef = UndefValue::get(EltTy); - std::vector<Constant*> Elts; - for (unsigned i = 0; i != VWidth; ++i) { - Constant *Elt = DemandedElts[i] ? Zero : Undef; - Elts.push_back(Elt); } - UndefElts = DemandedElts ^ EltMask; - return ConstantVector::get(Elts); + + // If we changed the constant, return it. + Constant *NewCV = ConstantVector::get(Elts); + return NewCV != C ? NewCV : 0; } // Limit search depth. @@ -977,7 +976,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, if (NewUndefElts) { // Add additional discovered undefs. - std::vector<Constant*> Elts; + SmallVector<Constant*, 16> Elts; for (unsigned i = 0; i < VWidth; ++i) { if (UndefElts[i]) Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext()))); diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 154267c..cf60f0f 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -16,16 +16,16 @@ using namespace llvm; /// CheapToScalarize - Return true if the value is cheaper to scalarize than it -/// is to leave as a vector operation. +/// is to leave as a vector operation. isConstant indicates whether we're +/// extracting one known element. If false we're extracting a variable index. static bool CheapToScalarize(Value *V, bool isConstant) { - if (isa<ConstantAggregateZero>(V)) - return true; - if (ConstantVector *C = dyn_cast<ConstantVector>(V)) { + if (Constant *C = dyn_cast<Constant>(V)) { if (isConstant) return true; - // If all elts are the same, we can extract. - Constant *Op0 = C->getOperand(0); - for (unsigned i = 1; i < C->getNumOperands(); ++i) - if (C->getOperand(i) != Op0) + + // If all elts are the same, we can extract it and use any of the values. + Constant *Op0 = C->getAggregateElement(0U); + for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e; ++i) + if (C->getAggregateElement(i) != Op0) return false; return true; } @@ -53,41 +53,18 @@ static bool CheapToScalarize(Value *V, bool isConstant) { return false; } -/// getShuffleMask - Read and decode a shufflevector mask. -/// Turn undef elements into negative values. -static std::vector<int> getShuffleMask(const ShuffleVectorInst *SVI) { - unsigned NElts = SVI->getType()->getNumElements(); - if (isa<ConstantAggregateZero>(SVI->getOperand(2))) - return std::vector<int>(NElts, 0); - if (isa<UndefValue>(SVI->getOperand(2))) - return std::vector<int>(NElts, -1); - - std::vector<int> Result; - const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2)); - for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i) - if (isa<UndefValue>(*i)) - Result.push_back(-1); // undef - else - Result.push_back(cast<ConstantInt>(*i)->getZExtValue()); - return Result; -} - /// FindScalarElement - Given a vector and an element number, see if the scalar /// value is already around as a register, for example if it were inserted then /// extracted from the vector. static Value *FindScalarElement(Value *V, unsigned EltNo) { assert(V->getType()->isVectorTy() && "Not looking at a vector?"); - VectorType *PTy = cast<VectorType>(V->getType()); - unsigned Width = PTy->getNumElements(); + VectorType *VTy = cast<VectorType>(V->getType()); + unsigned Width = VTy->getNumElements(); if (EltNo >= Width) // Out of range access. - return UndefValue::get(PTy->getElementType()); + return UndefValue::get(VTy->getElementType()); - if (isa<UndefValue>(V)) - return UndefValue::get(PTy->getElementType()); - if (isa<ConstantAggregateZero>(V)) - return Constant::getNullValue(PTy->getElementType()); - if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) - return CP->getOperand(EltNo); + if (Constant *C = dyn_cast<Constant>(V)) + return C->getAggregateElement(EltNo); if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { // If this is an insert to a variable element, we don't know what it is. @@ -106,11 +83,10 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) { } if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) { - unsigned LHSWidth = - cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); - int InEl = getShuffleMask(SVI)[EltNo]; + unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements(); + int InEl = SVI->getMaskValue(EltNo); if (InEl < 0) - return UndefValue::get(PTy->getElementType()); + return UndefValue::get(VTy->getElementType()); if (InEl < (int)LHSWidth) return FindScalarElement(SVI->getOperand(0), InEl); return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); @@ -121,27 +97,11 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) { } Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { - // If vector val is undef, replace extract with scalar undef. - if (isa<UndefValue>(EI.getOperand(0))) - return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); - - // If vector val is constant 0, replace extract with scalar 0. - if (isa<ConstantAggregateZero>(EI.getOperand(0))) - return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType())); - - if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) { - // If vector val is constant with all elements the same, replace EI with - // that element. When the elements are not identical, we cannot replace yet - // (we do that below, but only when the index is constant). - Constant *op0 = C->getOperand(0); - for (unsigned i = 1; i != C->getNumOperands(); ++i) - if (C->getOperand(i) != op0) { - op0 = 0; - break; - } - if (op0) - return ReplaceInstUsesWith(EI, op0); - } + // If vector val is constant with all elements the same, replace EI with + // that element. We handle a known element # below. + if (Constant *C = dyn_cast<Constant>(EI.getOperand(0))) + if (CheapToScalarize(C, false)) + return ReplaceInstUsesWith(EI, C->getAggregateElement(0U)); // If extracting a specified index from the vector, see if we can recursively // find a previously computed scalar that was inserted into the vector. @@ -175,8 +135,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { // the same number of elements, see if we can find the source element from // it. In this case, we will end up needing to bitcast the scalars. if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) { - if (VectorType *VT = - dyn_cast<VectorType>(BCI->getOperand(0)->getType())) + if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType())) if (VT->getNumElements() == VectorWidth) if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal)) return new BitCastInst(Elt, EI.getType()); @@ -212,10 +171,10 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { // If this is extracting an element from a shufflevector, figure out where // it came from and extract from the appropriate input element instead. if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) { - int SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()]; + int SrcIdx = SVI->getMaskValue(Elt->getZExtValue()); Value *Src; unsigned LHSWidth = - cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements(); + SVI->getOperand(0)->getType()->getVectorNumElements(); if (SrcIdx < 0) return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); @@ -248,7 +207,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { /// elements from either LHS or RHS, return the shuffle mask and true. /// Otherwise, return false. static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, - std::vector<Constant*> &Mask) { + SmallVectorImpl<Constant*> &Mask) { assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() && "Invalid CollectSingleShuffleElements"); unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); @@ -325,7 +284,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, /// CollectShuffleElements - We are building a shuffle of V, using RHS as the /// RHS of the shuffle instruction, if it is not null. Return a shuffle mask /// that computes V and the LHS value of the shuffle. -static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask, +static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask, Value *&RHS) { assert(V->getType()->isVectorTy() && (RHS == 0 || V->getType() == RHS->getType()) && @@ -335,10 +294,14 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask, if (isa<UndefValue>(V)) { Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); return V; - } else if (isa<ConstantAggregateZero>(V)) { + } + + if (isa<ConstantAggregateZero>(V)) { Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0)); return V; - } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { + } + + if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { // If this is an insert of an extract from some other vector, include it. Value *VecOp = IEI->getOperand(0); Value *ScalarOp = IEI->getOperand(1); @@ -421,7 +384,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { // If this insertelement isn't used by some other insertelement, turn it // (and any insertelements it points to), into one big shuffle. if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) { - std::vector<Constant*> Mask; + SmallVector<Constant*, 16> Mask; Value *RHS = 0; Value *LHS = CollectShuffleElements(&IE, Mask, RHS); if (RHS == 0) RHS = UndefValue::get(LHS->getType()); @@ -447,7 +410,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); - std::vector<int> Mask = getShuffleMask(&SVI); + SmallVector<int, 16> Mask = SVI.getShuffleMask(); bool MadeChange = false; @@ -457,9 +420,6 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements(); - if (VWidth != cast<VectorType>(LHS->getType())->getNumElements()) - return 0; - APInt UndefElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { @@ -470,29 +430,34 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { MadeChange = true; } + unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements(); + // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask') // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask'). if (LHS == RHS || isa<UndefValue>(LHS)) { if (isa<UndefValue>(LHS) && LHS == RHS) { // shuffle(undef,undef,mask) -> undef. - return ReplaceInstUsesWith(SVI, LHS); + Value* result = (VWidth == LHSWidth) + ? LHS : UndefValue::get(SVI.getType()); + return ReplaceInstUsesWith(SVI, result); } // Remap any references to RHS to use LHS. - std::vector<Constant*> Elts; - for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - if (Mask[i] < 0) + SmallVector<Constant*, 16> Elts; + for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) { + if (Mask[i] < 0) { Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); - else { - if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) || - (Mask[i] < (int)e && isa<UndefValue>(LHS))) { - Mask[i] = -1; // Turn into undef. - Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); - } else { - Mask[i] = Mask[i] % e; // Force to LHS. - Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()), - Mask[i])); - } + continue; + } + + if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) || + (Mask[i] < (int)e && isa<UndefValue>(LHS))) { + Mask[i] = -1; // Turn into undef. + Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); + } else { + Mask[i] = Mask[i] % e; // Force to LHS. + Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()), + Mask[i])); } } SVI.setOperand(0, SVI.getOperand(1)); @@ -503,72 +468,204 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { MadeChange = true; } - // Analyze the shuffle, are the LHS or RHS and identity shuffles? - bool isLHSID = true, isRHSID = true; + if (VWidth == LHSWidth) { + // Analyze the shuffle, are the LHS or RHS and identity shuffles? + bool isLHSID = true, isRHSID = true; - for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - if (Mask[i] < 0) continue; // Ignore undef values. - // Is this an identity shuffle of the LHS value? - isLHSID &= (Mask[i] == (int)i); + for (unsigned i = 0, e = Mask.size(); i != e; ++i) { + if (Mask[i] < 0) continue; // Ignore undef values. + // Is this an identity shuffle of the LHS value? + isLHSID &= (Mask[i] == (int)i); - // Is this an identity shuffle of the RHS value? - isRHSID &= (Mask[i]-e == i); - } + // Is this an identity shuffle of the RHS value? + isRHSID &= (Mask[i]-e == i); + } - // Eliminate identity shuffles. - if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); - if (isRHSID) return ReplaceInstUsesWith(SVI, RHS); + // Eliminate identity shuffles. + if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); + if (isRHSID) return ReplaceInstUsesWith(SVI, RHS); + } // If the LHS is a shufflevector itself, see if we can combine it with this - // one without producing an unusual shuffle. Here we are really conservative: + // one without producing an unusual shuffle. + // Cases that might be simplified: + // 1. + // x1=shuffle(v1,v2,mask1) + // x=shuffle(x1,undef,mask) + // ==> + // x=shuffle(v1,undef,newMask) + // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1 + // 2. + // x1=shuffle(v1,undef,mask1) + // x=shuffle(x1,x2,mask) + // where v1.size() == mask1.size() + // ==> + // x=shuffle(v1,x2,newMask) + // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i] + // 3. + // x2=shuffle(v2,undef,mask2) + // x=shuffle(x1,x2,mask) + // where v2.size() == mask2.size() + // ==> + // x=shuffle(x1,v2,newMask) + // newMask[i] = (mask[i] < x1.size()) + // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size() + // 4. + // x1=shuffle(v1,undef,mask1) + // x2=shuffle(v2,undef,mask2) + // x=shuffle(x1,x2,mask) + // where v1.size() == v2.size() + // ==> + // x=shuffle(v1,v2,newMask) + // newMask[i] = (mask[i] < x1.size()) + // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size() + // + // Here we are really conservative: // we are absolutely afraid of producing a shuffle mask not in the input // program, because the code gen may not be smart enough to turn a merged // shuffle into two specific shuffles: it may produce worse code. As such, // we only merge two shuffles if the result is either a splat or one of the - // two input shuffle masks. In this case, merging the shuffles just removes + // input shuffle masks. In this case, merging the shuffles just removes // one instruction, which we know is safe. This is good for things like - // turning: (splat(splat)) -> splat. - if (ShuffleVectorInst *LHSSVI = dyn_cast<ShuffleVectorInst>(LHS)) { + // turning: (splat(splat)) -> splat, or + // merge(V[0..n], V[n+1..2n]) -> V[0..2n] + ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS); + ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS); + if (LHSShuffle) + if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS)) + LHSShuffle = NULL; + if (RHSShuffle) + if (!isa<UndefValue>(RHSShuffle->getOperand(1))) + RHSShuffle = NULL; + if (!LHSShuffle && !RHSShuffle) + return MadeChange ? &SVI : 0; + + Value* LHSOp0 = NULL; + Value* LHSOp1 = NULL; + Value* RHSOp0 = NULL; + unsigned LHSOp0Width = 0; + unsigned RHSOp0Width = 0; + if (LHSShuffle) { + LHSOp0 = LHSShuffle->getOperand(0); + LHSOp1 = LHSShuffle->getOperand(1); + LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements(); + } + if (RHSShuffle) { + RHSOp0 = RHSShuffle->getOperand(0); + RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements(); + } + Value* newLHS = LHS; + Value* newRHS = RHS; + if (LHSShuffle) { + // case 1 if (isa<UndefValue>(RHS)) { - std::vector<int> LHSMask = getShuffleMask(LHSSVI); - - if (LHSMask.size() == Mask.size()) { - std::vector<int> NewMask; - bool isSplat = true; - int SplatElt = -1; // undef - for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - int MaskElt; - if (Mask[i] < 0 || Mask[i] >= (int)e) - MaskElt = -1; // undef - else - MaskElt = LHSMask[Mask[i]]; - // Check if this could still be a splat. - if (MaskElt >= 0) { - if (SplatElt >=0 && SplatElt != MaskElt) - isSplat = false; - SplatElt = MaskElt; - } - NewMask.push_back(MaskElt); - } + newLHS = LHSOp0; + newRHS = LHSOp1; + } + // case 2 or 4 + else if (LHSOp0Width == LHSWidth) { + newLHS = LHSOp0; + } + } + // case 3 or 4 + if (RHSShuffle && RHSOp0Width == LHSWidth) { + newRHS = RHSOp0; + } + // case 4 + if (LHSOp0 == RHSOp0) { + newLHS = LHSOp0; + newRHS = NULL; + } - // If the result mask is equal to the src shuffle or this - // shuffle mask, do the replacement. - if (isSplat || NewMask == LHSMask || NewMask == Mask) { - std::vector<Constant*> Elts; - Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); - for (unsigned i = 0, e = NewMask.size(); i != e; ++i) { - if (NewMask[i] < 0) { - Elts.push_back(UndefValue::get(Int32Ty)); - } else { - Elts.push_back(ConstantInt::get(Int32Ty, NewMask[i])); - } - } - return new ShuffleVectorInst(LHSSVI->getOperand(0), - LHSSVI->getOperand(1), - ConstantVector::get(Elts)); + if (newLHS == LHS && newRHS == RHS) + return MadeChange ? &SVI : 0; + + SmallVector<int, 16> LHSMask; + SmallVector<int, 16> RHSMask; + if (newLHS != LHS) + LHSMask = LHSShuffle->getShuffleMask(); + if (RHSShuffle && newRHS != RHS) + RHSMask = RHSShuffle->getShuffleMask(); + + unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth; + SmallVector<int, 16> newMask; + bool isSplat = true; + int SplatElt = -1; + // Create a new mask for the new ShuffleVectorInst so that the new + // ShuffleVectorInst is equivalent to the original one. + for (unsigned i = 0; i < VWidth; ++i) { + int eltMask; + if (Mask[i] == -1) { + // This element is an undef value. + eltMask = -1; + } else if (Mask[i] < (int)LHSWidth) { + // This element is from left hand side vector operand. + // + // If LHS is going to be replaced (case 1, 2, or 4), calculate the + // new mask value for the element. + if (newLHS != LHS) { + eltMask = LHSMask[Mask[i]]; + // If the value selected is an undef value, explicitly specify it + // with a -1 mask value. + if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1)) + eltMask = -1; + } + else + eltMask = Mask[i]; + } else { + // This element is from right hand side vector operand + // + // If the value selected is an undef value, explicitly specify it + // with a -1 mask value. (case 1) + if (isa<UndefValue>(RHS)) + eltMask = -1; + // If RHS is going to be replaced (case 3 or 4), calculate the + // new mask value for the element. + else if (newRHS != RHS) { + eltMask = RHSMask[Mask[i]-LHSWidth]; + // If the value selected is an undef value, explicitly specify it + // with a -1 mask value. + if (eltMask >= (int)RHSOp0Width) { + assert(isa<UndefValue>(RHSShuffle->getOperand(1)) + && "should have been check above"); + eltMask = -1; } } + else + eltMask = Mask[i]-LHSWidth; + + // If LHS's width is changed, shift the mask value accordingly. + // If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any + // references to RHSOp0 to LHSOp0, so we don't need to shift the mask. + if (eltMask >= 0 && newRHS != NULL) + eltMask += newLHSWidth; + } + + // Check if this could still be a splat. + if (eltMask >= 0) { + if (SplatElt >= 0 && SplatElt != eltMask) + isSplat = false; + SplatElt = eltMask; + } + + newMask.push_back(eltMask); + } + + // If the result mask is equal to one of the original shuffle masks, + // or is a splat, do the replacement. + if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) { + SmallVector<Constant*, 16> Elts; + Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); + for (unsigned i = 0, e = newMask.size(); i != e; ++i) { + if (newMask[i] < 0) { + Elts.push_back(UndefValue::get(Int32Ty)); + } else { + Elts.push_back(ConstantInt::get(Int32Ty, newMask[i])); + } } + if (newRHS == NULL) + newRHS = UndefValue::get(newLHS->getType()); + return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts)); } return MadeChange ? &SVI : 0; diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h index 32009c3..99a02fc 100644 --- a/lib/Transforms/InstCombine/InstCombineWorklist.h +++ b/lib/Transforms/InstCombine/InstCombineWorklist.h @@ -55,9 +55,9 @@ public: Worklist.reserve(NumEntries+16); WorklistMap.resize(NumEntries); DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n"); - for (; NumEntries; --NumEntries) { + for (unsigned Idx = 0; NumEntries; --NumEntries) { Instruction *I = List[NumEntries-1]; - WorklistMap.insert(std::make_pair(I, Worklist.size())); + WorklistMap.insert(std::make_pair(I, Idx++)); Worklist.push_back(I); } } diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index c15b805..066b2ec 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -41,6 +41,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" @@ -74,11 +75,15 @@ void LLVMInitializeInstCombine(LLVMPassRegistryRef R) { } char InstCombiner::ID = 0; -INITIALIZE_PASS(InstCombiner, "instcombine", +INITIALIZE_PASS_BEGIN(InstCombiner, "instcombine", + "Combine redundant instructions", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(InstCombiner, "instcombine", "Combine redundant instructions", false, false) void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + AU.addRequired<TargetLibraryInfo>(); } @@ -490,7 +495,7 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const { if (ConstantInt *C = dyn_cast<ConstantInt>(V)) return ConstantExpr::getNeg(C); - if (ConstantVector *C = dyn_cast<ConstantVector>(V)) + if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V)) if (C->getType()->getElementType()->isIntegerTy()) return ConstantExpr::getNeg(C); @@ -509,7 +514,7 @@ Value *InstCombiner::dyn_castFNegVal(Value *V) const { if (ConstantFP *C = dyn_cast<ConstantFP>(V)) return ConstantExpr::getFNeg(C); - if (ConstantVector *C = dyn_cast<ConstantVector>(V)) + if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V)) if (C->getType()->getElementType()->isFloatingPointTy()) return ConstantExpr::getFNeg(C); @@ -826,7 +831,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { MadeChange = true; } - if ((*I)->getType() != IntPtrTy) { + Type *IndexTy = (*I)->getType(); + if (IndexTy != IntPtrTy && !IndexTy->isVectorTy()) { // If we are using a wider index than needed for this platform, shrink // it to what we need. If narrower, sign-extend it to what we need. // This explicit cast can make subsequent optimizations more obvious. @@ -909,7 +915,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). Value *StrippedPtr = PtrOp->stripPointerCasts(); - PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType()); + PointerType *StrippedPtrTy = dyn_cast<PointerType>(StrippedPtr->getType()); + + // We do not handle pointer-vector geps here. + if (!StrippedPtrTy) + return 0; + if (StrippedPtr != PtrOp && StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) { @@ -1235,15 +1246,15 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { if (I->getOpcode() == Instruction::Add) if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) { // change 'switch (X+4) case 1:' into 'switch (X) case -3' - unsigned NumCases = SI.getNumCases(); // Skip the first item since that's the default case. - for (unsigned i = 1; i < NumCases; ++i) { - ConstantInt* CaseVal = SI.getCaseValue(i); + for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); + i != e; ++i) { + ConstantInt* CaseVal = i.getCaseValue(); Constant* NewCaseVal = ConstantExpr::getSub(cast<Constant>(CaseVal), AddRHS); assert(isa<ConstantInt>(NewCaseVal) && "Result of expression should be constant"); - SI.setSuccessorValue(i, cast<ConstantInt>(NewCaseVal)); + i.setValue(cast<ConstantInt>(NewCaseVal)); } SI.setCondition(I->getOperand(0)); Worklist.Add(I); @@ -1260,24 +1271,16 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { return ReplaceInstUsesWith(EV, Agg); if (Constant *C = dyn_cast<Constant>(Agg)) { - if (isa<UndefValue>(C)) - return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType())); - - if (isa<ConstantAggregateZero>(C)) - return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType())); - - if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) { - // Extract the element indexed by the first index out of the constant - Value *V = C->getOperand(*EV.idx_begin()); - if (EV.getNumIndices() > 1) - // Extract the remaining indices out of the constant indexed by the - // first index - return ExtractValueInst::Create(V, EV.getIndices().slice(1)); - else - return ReplaceInstUsesWith(EV, V); + if (Constant *C2 = C->getAggregateElement(*EV.idx_begin())) { + if (EV.getNumIndices() == 0) + return ReplaceInstUsesWith(EV, C2); + // Extract the remaining indices out of the constant indexed by the + // first index + return ExtractValueInst::Create(C2, EV.getIndices().slice(1)); } return 0; // Can't handle other constants - } + } + if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) { // We're extracting from an insertvalue instruction, compare the indices const unsigned *exti, *exte, *insi, *inse; @@ -1414,7 +1417,8 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { enum Personality_Type { Unknown_Personality, GNU_Ada_Personality, - GNU_CXX_Personality + GNU_CXX_Personality, + GNU_ObjC_Personality }; /// RecognizePersonality - See if the given exception handling personality @@ -1426,7 +1430,8 @@ static Personality_Type RecognizePersonality(Value *Pers) { return Unknown_Personality; return StringSwitch<Personality_Type>(F->getName()) .Case("__gnat_eh_personality", GNU_Ada_Personality) - .Case("__gxx_personality_v0", GNU_CXX_Personality) + .Case("__gxx_personality_v0", GNU_CXX_Personality) + .Case("__objc_personality_v0", GNU_ObjC_Personality) .Default(Unknown_Personality); } @@ -1440,6 +1445,7 @@ static bool isCatchAll(Personality_Type Personality, Constant *TypeInfo) { // match foreign exceptions (or didn't, before gcc-4.7). return false; case GNU_CXX_Personality: + case GNU_ObjC_Personality: return TypeInfo->isNullValue(); } llvm_unreachable("Unknown personality!"); @@ -1795,7 +1801,8 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { static bool AddReachableCodeToWorklist(BasicBlock *BB, SmallPtrSet<BasicBlock*, 64> &Visited, InstCombiner &IC, - const TargetData *TD) { + const TargetData *TD, + const TargetLibraryInfo *TLI) { bool MadeIRChange = false; SmallVector<BasicBlock*, 256> Worklist; Worklist.push_back(BB); @@ -1822,7 +1829,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, // ConstantProp instruction if trivially constant. if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0))) - if (Constant *C = ConstantFoldInstruction(Inst, TD)) { + if (Constant *C = ConstantFoldInstruction(Inst, TD, TLI)) { DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *Inst << '\n'); Inst->replaceAllUsesWith(C); @@ -1840,7 +1847,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, Constant*& FoldRes = FoldedConstants[CE]; if (!FoldRes) - FoldRes = ConstantFoldConstantExpression(CE, TD); + FoldRes = ConstantFoldConstantExpression(CE, TD, TLI); if (!FoldRes) FoldRes = CE; @@ -1867,15 +1874,16 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) { // See if this is an explicit destination. - for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) - if (SI->getCaseValue(i) == Cond) { - BasicBlock *ReachableBB = SI->getSuccessor(i); + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) + if (i.getCaseValue() == Cond) { + BasicBlock *ReachableBB = i.getCaseSuccessor(); Worklist.push_back(ReachableBB); continue; } // Otherwise it is the default destination. - Worklist.push_back(SI->getSuccessor(0)); + Worklist.push_back(SI->getDefaultDest()); continue; } } @@ -1899,14 +1907,15 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { MadeIRChange = false; DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " - << F.getNameStr() << "\n"); + << F.getName() << "\n"); { // Do a depth-first traversal of the function, populate the worklist with // the reachable instructions. Ignore blocks that are not reachable. Keep // track of which blocks we visit. SmallPtrSet<BasicBlock*, 64> Visited; - MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD); + MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD, + TLI); // Do a quick scan over the function. If we find any blocks that are // unreachable, remove any instructions inside of them. This prevents @@ -1951,7 +1960,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // Instruction isn't dead, see if we can constant propagate it. if (!I->use_empty() && isa<Constant>(I->getOperand(0))) - if (Constant *C = ConstantFoldInstruction(I, TD)) { + if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) { DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); // Add operands to the worklist. @@ -2059,7 +2068,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { bool InstCombiner::runOnFunction(Function &F) { TD = getAnalysisIfAvailable<TargetData>(); - + TLI = &getAnalysis<TargetLibraryInfo>(); /// Builder - This is an IRBuilder that automatically inserts new /// instructions into the worklist when they are created. diff --git a/lib/Transforms/InstCombine/LLVMBuild.txt b/lib/Transforms/InstCombine/LLVMBuild.txt new file mode 100644 index 0000000..62c61616 --- /dev/null +++ b/lib/Transforms/InstCombine/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/Transforms/InstCombine/LLVMBuild.txt ---------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = InstCombine +parent = Transforms +required_libraries = Analysis Core Support Target TransformUtils |