summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms/InstCombine
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/InstCombine')
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp119
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp53
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp392
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp264
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp233
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h (renamed from contrib/llvm/lib/Transforms/InstCombine/InstCombine.h)305
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp576
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp46
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp17
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp413
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp31
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp274
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp129
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h107
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp834
15 files changed, 2218 insertions, 1575 deletions
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 6d20384..a8d0172 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/DataLayout.h"
@@ -891,7 +891,7 @@ static bool checkRippleForAdd(const APInt &Op0KnownZero,
/// This basically requires proving that the add in the original type would not
/// overflow to change the sign bit or have a carry out.
bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS,
- Instruction *CxtI) {
+ Instruction &CxtI) {
// There are different heuristics we can use for this. Here are some simple
// ones.
@@ -909,18 +909,18 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS,
//
// Since the carry into the most significant position is always equal to
// the carry out of the addition, there is no signed overflow.
- if (ComputeNumSignBits(LHS, 0, CxtI) > 1 &&
- ComputeNumSignBits(RHS, 0, CxtI) > 1)
+ if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 &&
+ ComputeNumSignBits(RHS, 0, &CxtI) > 1)
return true;
unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
APInt LHSKnownZero(BitWidth, 0);
APInt LHSKnownOne(BitWidth, 0);
- computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, CxtI);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI);
APInt RHSKnownZero(BitWidth, 0);
APInt RHSKnownOne(BitWidth, 0);
- computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, CxtI);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI);
// Addition of two 2's compliment numbers having opposite signs will never
// overflow.
@@ -943,21 +943,21 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS,
/// overflow to change the sign bit or have a carry out.
/// TODO: Handle this for Vectors.
bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS,
- Instruction *CxtI) {
+ Instruction &CxtI) {
// If LHS and RHS each have at least two sign bits, the subtraction
// cannot overflow.
- if (ComputeNumSignBits(LHS, 0, CxtI) > 1 &&
- ComputeNumSignBits(RHS, 0, CxtI) > 1)
+ if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 &&
+ ComputeNumSignBits(RHS, 0, &CxtI) > 1)
return true;
unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
APInt LHSKnownZero(BitWidth, 0);
APInt LHSKnownOne(BitWidth, 0);
- computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, CxtI);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI);
APInt RHSKnownZero(BitWidth, 0);
APInt RHSKnownOne(BitWidth, 0);
- computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, CxtI);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI);
// Subtraction of two 2's compliment numbers having identical signs will
// never overflow.
@@ -972,12 +972,14 @@ bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS,
/// \brief Return true if we can prove that:
/// (sub LHS, RHS) === (sub nuw LHS, RHS)
bool InstCombiner::WillNotOverflowUnsignedSub(Value *LHS, Value *RHS,
- Instruction *CxtI) {
+ Instruction &CxtI) {
// If the LHS is negative and the RHS is non-negative, no unsigned wrap.
bool LHSKnownNonNegative, LHSKnownNegative;
bool RHSKnownNonNegative, RHSKnownNegative;
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, /*Depth=*/0, CxtI);
- ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, /*Depth=*/0, CxtI);
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, /*Depth=*/0,
+ &CxtI);
+ ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, /*Depth=*/0,
+ &CxtI);
if (LHSKnownNegative && RHSKnownNonNegative)
return true;
@@ -1046,15 +1048,15 @@ static Value *checkForNegativeOperand(BinaryOperator &I,
}
Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
- bool Changed = SimplifyAssociativeOrCommutative(I);
- Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
- if (Value *V = SimplifyVectorOp(I))
- return ReplaceInstUsesWith(I, V);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
- if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
- I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
- return ReplaceInstUsesWith(I, V);
+ if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
+ I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
+ return ReplaceInstUsesWith(I, V);
// (A*B)+(A*C) -> A*(B+C) etc
if (Value *V = SimplifyUsingDistributiveLaws(I))
@@ -1158,20 +1160,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
return ReplaceInstUsesWith(I, V);
// A+B --> A|B iff A and B have no bits set in common.
- if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
- APInt LHSKnownOne(IT->getBitWidth(), 0);
- APInt LHSKnownZero(IT->getBitWidth(), 0);
- computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &I);
- if (LHSKnownZero != 0) {
- APInt RHSKnownOne(IT->getBitWidth(), 0);
- APInt RHSKnownZero(IT->getBitWidth(), 0);
- computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &I);
-
- // No bits in common -> bitwise or.
- if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
- return BinaryOperator::CreateOr(LHS, RHS);
- }
- }
+ if (haveNoCommonBitsSet(LHS, RHS, DL, AC, &I, DT))
+ return BinaryOperator::CreateOr(LHS, RHS);
if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
Value *X;
@@ -1243,7 +1233,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
if (LHSConv->hasOneUse() &&
ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
- WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, &I)) {
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) {
// Insert the new, smaller add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
CI, "addconv");
@@ -1256,10 +1246,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// Only do this if x/y have the same type, if at last one of them has a
// single use (so we don't increase the number of sexts), and if the
// integer add will not overflow.
- if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+ if (LHSConv->getOperand(0)->getType() ==
+ RHSConv->getOperand(0)->getType() &&
(LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
WillNotOverflowSignedAdd(LHSConv->getOperand(0),
- RHSConv->getOperand(0), &I)) {
+ RHSConv->getOperand(0), I)) {
// Insert the new integer add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
RHSConv->getOperand(0), "addconv");
@@ -1307,7 +1298,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// TODO(jingyue): Consider WillNotOverflowSignedAdd and
// WillNotOverflowUnsignedAdd to reduce the number of invocations of
// computeKnownBits.
- if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, &I)) {
+ if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, I)) {
Changed = true;
I.setHasNoSignedWrap(true);
}
@@ -1371,7 +1362,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
if (LHSConv->hasOneUse() &&
ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
- WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, &I)) {
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) {
// Insert the new integer add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
CI, "addconv");
@@ -1384,10 +1375,11 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
// Only do this if x/y have the same type, if at last one of them has a
// single use (so we don't increase the number of int->fp conversions),
// and if the integer add will not overflow.
- if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+ if (LHSConv->getOperand(0)->getType() ==
+ RHSConv->getOperand(0)->getType() &&
(LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
WillNotOverflowSignedAdd(LHSConv->getOperand(0),
- RHSConv->getOperand(0), &I)) {
+ RHSConv->getOperand(0), I)) {
// Insert the new integer add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
RHSConv->getOperand(0),"addconv");
@@ -1436,8 +1428,6 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
///
Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
Type *Ty) {
- assert(DL && "Must have target data info for this");
-
// If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
// this.
bool Swapped = false;
@@ -1584,6 +1574,19 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
CI->getValue() == I.getType()->getPrimitiveSizeInBits() - 1)
return BinaryOperator::CreateLShr(X, CI);
}
+
+ // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
+ // zero.
+ APInt IntVal = C->getValue();
+ if ((IntVal + 1).isPowerOf2()) {
+ unsigned BitWidth = I.getType()->getScalarSizeInBits();
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ computeKnownBits(&I, KnownZero, KnownOne, 0, &I);
+ if ((IntVal | KnownZero).isAllOnesValue()) {
+ return BinaryOperator::CreateXor(Op1, C);
+ }
+ }
}
@@ -1662,26 +1665,24 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// Optimize pointer differences into the same array into a size. Consider:
// &A[10] - &A[0]: we should compile this to "10".
- if (DL) {
- Value *LHSOp, *RHSOp;
- if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
- match(Op1, m_PtrToInt(m_Value(RHSOp))))
- if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
- return ReplaceInstUsesWith(I, Res);
-
- // trunc(p)-trunc(q) -> trunc(p-q)
- if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
- match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
- if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
- return ReplaceInstUsesWith(I, Res);
- }
+ Value *LHSOp, *RHSOp;
+ if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
+ match(Op1, m_PtrToInt(m_Value(RHSOp))))
+ if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+ return ReplaceInstUsesWith(I, Res);
+
+ // trunc(p)-trunc(q) -> trunc(p-q)
+ if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
+ match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
+ if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+ return ReplaceInstUsesWith(I, Res);
bool Changed = false;
- if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, &I)) {
+ if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, I)) {
Changed = true;
I.setHasNoSignedWrap(true);
}
- if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, &I)) {
+ if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, I)) {
Changed = true;
I.setHasNoUnsignedWrap(true);
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 74b6970..ee21c81 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Intrinsics.h"
@@ -22,30 +22,12 @@ using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
-/// isFreeToInvert - Return true if the specified value is free to invert (apply
-/// ~ to). This happens in cases where the ~ can be eliminated.
-static inline bool isFreeToInvert(Value *V) {
- // ~(~(X)) -> X.
- if (BinaryOperator::isNot(V))
- return true;
-
- // Constants can be considered to be not'ed values.
- if (isa<ConstantInt>(V))
- return true;
-
- // Compares can be inverted if they have a single use.
- if (CmpInst *CI = dyn_cast<CmpInst>(V))
- return CI->hasOneUse();
-
- return false;
-}
-
static inline Value *dyn_castNotVal(Value *V) {
// If this is not(not(x)) don't return that this is a not: we want the two
// not's to be folded first.
if (BinaryOperator::isNot(V)) {
Value *Operand = BinaryOperator::getNotArgument(V);
- if (!isFreeToInvert(Operand))
+ if (!IsFreeToInvert(Operand, Operand->hasOneUse()))
return Operand;
}
@@ -997,9 +979,9 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
// Make a constant range that's the intersection of the two icmp ranges.
// If the intersection is empty, we know that the result is false.
ConstantRange LHSRange =
- ConstantRange::makeICmpRegion(LHSCC, LHSCst->getValue());
+ ConstantRange::makeAllowedICmpRegion(LHSCC, LHSCst->getValue());
ConstantRange RHSRange =
- ConstantRange::makeICmpRegion(RHSCC, RHSCst->getValue());
+ ConstantRange::makeAllowedICmpRegion(RHSCC, RHSCst->getValue());
if (LHSRange.intersectWith(RHSRange).isEmptySet())
return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
@@ -1727,15 +1709,17 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
Value *Mask = nullptr;
Value *Masked = nullptr;
if (LAnd->getOperand(0) == RAnd->getOperand(0) &&
- isKnownToBeAPowerOfTwo(LAnd->getOperand(1), false, 0, AC, CxtI, DT) &&
- isKnownToBeAPowerOfTwo(RAnd->getOperand(1), false, 0, AC, CxtI, DT)) {
+ isKnownToBeAPowerOfTwo(LAnd->getOperand(1), DL, false, 0, AC, CxtI,
+ DT) &&
+ isKnownToBeAPowerOfTwo(RAnd->getOperand(1), DL, false, 0, AC, CxtI,
+ DT)) {
Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1));
Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask);
} else if (LAnd->getOperand(1) == RAnd->getOperand(1) &&
- isKnownToBeAPowerOfTwo(LAnd->getOperand(0), false, 0, AC, CxtI,
- DT) &&
- isKnownToBeAPowerOfTwo(RAnd->getOperand(0), false, 0, AC, CxtI,
- DT)) {
+ isKnownToBeAPowerOfTwo(LAnd->getOperand(0), DL, false, 0, AC,
+ CxtI, DT) &&
+ isKnownToBeAPowerOfTwo(RAnd->getOperand(0), DL, false, 0, AC,
+ CxtI, DT)) {
Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0));
Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask);
}
@@ -2585,8 +2569,10 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
// ~(X & Y) --> (~X | ~Y) - De Morgan's Law
// ~(X | Y) === (~X & ~Y) - De Morgan's Law
- if (isFreeToInvert(Op0I->getOperand(0)) &&
- isFreeToInvert(Op0I->getOperand(1))) {
+ if (IsFreeToInvert(Op0I->getOperand(0),
+ Op0I->getOperand(0)->hasOneUse()) &&
+ IsFreeToInvert(Op0I->getOperand(1),
+ Op0I->getOperand(1)->hasOneUse())) {
Value *NotX =
Builder->CreateNot(Op0I->getOperand(0), "notlhs");
Value *NotY =
@@ -2604,15 +2590,16 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
}
}
-
- if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
- if (RHS->isOne() && Op0->hasOneUse())
+ if (Constant *RHS = dyn_cast<Constant>(Op1)) {
+ if (RHS->isAllOnesValue() && Op0->hasOneUse())
// xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
if (CmpInst *CI = dyn_cast<CmpInst>(Op0))
return CmpInst::Create(CI->getOpcode(),
CI->getInversePredicate(),
CI->getOperand(0), CI->getOperand(1));
+ }
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
// fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp).
if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) {
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 83b4b82..e83b9dd 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -11,16 +11,17 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/IR/CallSite.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
using namespace llvm;
using namespace PatternMatch;
@@ -60,8 +61,8 @@ static Type *reduceToSingleValueType(Type *T) {
}
Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
- unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, AC, MI, DT);
- unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, AC, MI, DT);
+ unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, AC, DT);
+ unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, AC, DT);
unsigned MinAlign = std::min(DstAlign, SrcAlign);
unsigned CopyAlign = MI->getAlignment();
@@ -107,7 +108,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
if (StrippedDest != MI->getArgOperand(0)) {
Type *SrcETy = cast<PointerType>(StrippedDest->getType())
->getElementType();
- if (DL && SrcETy->isSized() && DL->getTypeStoreSize(SrcETy) == Size) {
+ if (SrcETy->isSized() && DL.getTypeStoreSize(SrcETy) == Size) {
// The SrcETy might be something like {{{double}}} or [1 x double]. Rip
// down through these levels if so.
SrcETy = reduceToSingleValueType(SrcETy);
@@ -155,7 +156,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
}
Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
- unsigned Alignment = getKnownAlignment(MI->getDest(), DL, AC, MI, DT);
+ unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, AC, DT);
if (MI->getAlignment() < Alignment) {
MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
Alignment, false));
@@ -197,11 +198,137 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
return nullptr;
}
+static Value *SimplifyX86insertps(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
+ VectorType *VecTy = cast<VectorType>(II.getType());
+ assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
+
+ // The immediate permute control byte looks like this:
+ // [3:0] - zero mask for each 32-bit lane
+ // [5:4] - select one 32-bit destination lane
+ // [7:6] - select one 32-bit source lane
+
+ uint8_t Imm = CInt->getZExtValue();
+ uint8_t ZMask = Imm & 0xf;
+ uint8_t DestLane = (Imm >> 4) & 0x3;
+ uint8_t SourceLane = (Imm >> 6) & 0x3;
+
+ ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
+
+ // If all zero mask bits are set, this was just a weird way to
+ // generate a zero vector.
+ if (ZMask == 0xf)
+ return ZeroVector;
+
+ // Initialize by passing all of the first source bits through.
+ int ShuffleMask[4] = { 0, 1, 2, 3 };
+
+ // We may replace the second operand with the zero vector.
+ Value *V1 = II.getArgOperand(1);
+
+ if (ZMask) {
+ // If the zero mask is being used with a single input or the zero mask
+ // overrides the destination lane, this is a shuffle with the zero vector.
+ if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
+ (ZMask & (1 << DestLane))) {
+ V1 = ZeroVector;
+ // We may still move 32-bits of the first source vector from one lane
+ // to another.
+ ShuffleMask[DestLane] = SourceLane;
+ // The zero mask may override the previous insert operation.
+ for (unsigned i = 0; i < 4; ++i)
+ if ((ZMask >> i) & 0x1)
+ ShuffleMask[i] = i + 4;
+ } else {
+ // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle?
+ return nullptr;
+ }
+ } else {
+ // Replace the selected destination lane with the selected source lane.
+ ShuffleMask[DestLane] = SourceLane + 4;
+ }
+
+ return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
+ }
+ return nullptr;
+}
+
+/// The shuffle mask for a perm2*128 selects any two halves of two 256-bit
+/// source vectors, unless a zero bit is set. If a zero bit is set,
+/// then ignore that half of the mask and clear that half of the vector.
+static Value *SimplifyX86vperm2(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
+ VectorType *VecTy = cast<VectorType>(II.getType());
+ ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
+
+ // The immediate permute control byte looks like this:
+ // [1:0] - select 128 bits from sources for low half of destination
+ // [2] - ignore
+ // [3] - zero low half of destination
+ // [5:4] - select 128 bits from sources for high half of destination
+ // [6] - ignore
+ // [7] - zero high half of destination
+
+ uint8_t Imm = CInt->getZExtValue();
+
+ bool LowHalfZero = Imm & 0x08;
+ bool HighHalfZero = Imm & 0x80;
+
+ // If both zero mask bits are set, this was just a weird way to
+ // generate a zero vector.
+ if (LowHalfZero && HighHalfZero)
+ return ZeroVector;
+
+ // If 0 or 1 zero mask bits are set, this is a simple shuffle.
+ unsigned NumElts = VecTy->getNumElements();
+ unsigned HalfSize = NumElts / 2;
+ SmallVector<int, 8> ShuffleMask(NumElts);
+
+ // The high bit of the selection field chooses the 1st or 2nd operand.
+ bool LowInputSelect = Imm & 0x02;
+ bool HighInputSelect = Imm & 0x20;
+
+ // The low bit of the selection field chooses the low or high half
+ // of the selected operand.
+ bool LowHalfSelect = Imm & 0x01;
+ bool HighHalfSelect = Imm & 0x10;
+
+ // Determine which operand(s) are actually in use for this instruction.
+ Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
+ Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
+
+ // If needed, replace operands based on zero mask.
+ V0 = LowHalfZero ? ZeroVector : V0;
+ V1 = HighHalfZero ? ZeroVector : V1;
+
+ // Permute low half of result.
+ unsigned StartIndex = LowHalfSelect ? HalfSize : 0;
+ for (unsigned i = 0; i < HalfSize; ++i)
+ ShuffleMask[i] = StartIndex + i;
+
+ // Permute high half of result.
+ StartIndex = HighHalfSelect ? HalfSize : 0;
+ StartIndex += NumElts;
+ for (unsigned i = 0; i < HalfSize; ++i)
+ ShuffleMask[i + HalfSize] = StartIndex + i;
+
+ return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
+ }
+ return nullptr;
+}
+
/// visitCallInst - CallInst simplification. This mostly only handles folding
/// of intrinsic instructions. For normal calls, it allows visitCallSite to do
/// the heavy lifting.
///
Instruction *InstCombiner::visitCallInst(CallInst &CI) {
+ auto Args = CI.arg_operands();
+ if (Value *V = SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), DL,
+ TLI, DT, AC))
+ return ReplaceInstUsesWith(CI, V);
+
if (isFreeCall(&CI, TLI))
return visitFree(CI);
@@ -350,112 +477,36 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
break;
- case Intrinsic::uadd_with_overflow: {
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, II);
- if (OR == OverflowResult::NeverOverflows)
- return CreateOverflowTuple(II, Builder->CreateNUWAdd(LHS, RHS), false);
- if (OR == OverflowResult::AlwaysOverflows)
- return CreateOverflowTuple(II, Builder->CreateAdd(LHS, RHS), true);
- }
- // FALL THROUGH uadd into sadd
+
+ case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
- // Canonicalize constants into the RHS.
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow:
if (isa<Constant>(II->getArgOperand(0)) &&
!isa<Constant>(II->getArgOperand(1))) {
+ // Canonicalize constants into the RHS.
Value *LHS = II->getArgOperand(0);
II->setArgOperand(0, II->getArgOperand(1));
II->setArgOperand(1, LHS);
return II;
}
+ // fall through
- // X + undef -> undef
- if (isa<UndefValue>(II->getArgOperand(1)))
- return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
-
- if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
- // X + 0 -> {X, false}
- if (RHS->isZero()) {
- return CreateOverflowTuple(II, II->getArgOperand(0), false,
- /*ReUseName*/false);
- }
- }
-
- // We can strength reduce reduce this signed add into a regular add if we
- // can prove that it will never overflow.
- if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow) {
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- if (WillNotOverflowSignedAdd(LHS, RHS, II)) {
- return CreateOverflowTuple(II, Builder->CreateNSWAdd(LHS, RHS), false);
- }
- }
-
- break;
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow: {
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- // undef - X -> undef
- // X - undef -> undef
- if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS))
- return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
-
- if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) {
- // X - 0 -> {X, false}
- if (ConstRHS->isZero()) {
- return CreateOverflowTuple(II, LHS, false, /*ReUseName*/false);
- }
- }
- if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) {
- if (WillNotOverflowSignedSub(LHS, RHS, II)) {
- return CreateOverflowTuple(II, Builder->CreateNSWSub(LHS, RHS), false);
- }
- } else {
- if (WillNotOverflowUnsignedSub(LHS, RHS, II)) {
- return CreateOverflowTuple(II, Builder->CreateNUWSub(LHS, RHS), false);
- }
- }
- break;
- }
- case Intrinsic::umul_with_overflow: {
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, II);
- if (OR == OverflowResult::NeverOverflows)
- return CreateOverflowTuple(II, Builder->CreateNUWMul(LHS, RHS), false);
- if (OR == OverflowResult::AlwaysOverflows)
- return CreateOverflowTuple(II, Builder->CreateMul(LHS, RHS), true);
- } // FALL THROUGH
- case Intrinsic::smul_with_overflow:
- // Canonicalize constants into the RHS.
- if (isa<Constant>(II->getArgOperand(0)) &&
- !isa<Constant>(II->getArgOperand(1))) {
- Value *LHS = II->getArgOperand(0);
- II->setArgOperand(0, II->getArgOperand(1));
- II->setArgOperand(1, LHS);
- return II;
- }
-
- // X * undef -> undef
- if (isa<UndefValue>(II->getArgOperand(1)))
- return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+ OverflowCheckFlavor OCF =
+ IntrinsicIDToOverflowCheckFlavor(II->getIntrinsicID());
+ assert(OCF != OCF_INVALID && "unexpected!");
- if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
- // X*0 -> {0, false}
- if (RHSI->isZero())
- return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
+ Value *OperationResult = nullptr;
+ Constant *OverflowResult = nullptr;
+ if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1),
+ *II, OperationResult, OverflowResult))
+ return CreateOverflowTuple(II, OperationResult, OverflowResult);
- // X * 1 -> {X, false}
- if (RHSI->equalsInt(1)) {
- return CreateOverflowTuple(II, II->getArgOperand(0), false,
- /*ReUseName*/false);
- }
- }
- if (II->getIntrinsicID() == Intrinsic::smul_with_overflow) {
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- if (WillNotOverflowSignedMul(LHS, RHS, II)) {
- return CreateOverflowTuple(II, Builder->CreateNSWMul(LHS, RHS), false);
- }
- }
break;
+ }
+
case Intrinsic::minnum:
case Intrinsic::maxnum: {
Value *Arg0 = II->getArgOperand(0);
@@ -543,7 +594,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
// Turn PPC lvx -> load if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
16) {
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
@@ -560,7 +611,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
// Turn stvx -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >=
16) {
Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(0)->getType());
@@ -575,11 +626,54 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
return new StoreInst(II->getArgOperand(0), Ptr, false, 1);
}
+ case Intrinsic::ppc_qpx_qvlfs:
+ // Turn PPC QPX qvlfs -> load if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
+ 16) {
+ Type *VTy = VectorType::get(Builder->getFloatTy(),
+ II->getType()->getVectorNumElements());
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+ PointerType::getUnqual(VTy));
+ Value *Load = Builder->CreateLoad(Ptr);
+ return new FPExtInst(Load, II->getType());
+ }
+ break;
+ case Intrinsic::ppc_qpx_qvlfd:
+ // Turn PPC QPX qvlfd -> load if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, AC, DT) >=
+ 32) {
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+ PointerType::getUnqual(II->getType()));
+ return new LoadInst(Ptr);
+ }
+ break;
+ case Intrinsic::ppc_qpx_qvstfs:
+ // Turn PPC QPX qvstfs -> store if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >=
+ 16) {
+ Type *VTy = VectorType::get(Builder->getFloatTy(),
+ II->getArgOperand(0)->getType()->getVectorNumElements());
+ Value *TOp = Builder->CreateFPTrunc(II->getArgOperand(0), VTy);
+ Type *OpPtrTy = PointerType::getUnqual(VTy);
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+ return new StoreInst(TOp, Ptr);
+ }
+ break;
+ case Intrinsic::ppc_qpx_qvstfd:
+ // Turn PPC QPX qvstfd -> store if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, AC, DT) >=
+ 32) {
+ Type *OpPtrTy =
+ PointerType::getUnqual(II->getArgOperand(0)->getType());
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+ return new StoreInst(II->getArgOperand(0), Ptr);
+ }
+ break;
case Intrinsic::x86_sse_storeu_ps:
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
// Turn X86 storeu -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
16) {
Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(1)->getType());
@@ -696,15 +790,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
unsigned LowHalfElts = VWidth / 2;
APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts));
APInt UndefElts(VWidth, 0);
- if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0),
- InputDemandedElts,
- UndefElts)) {
+ if (Value *TmpV = SimplifyDemandedVectorElts(
+ II->getArgOperand(0), InputDemandedElts, UndefElts)) {
II->setArgOperand(0, TmpV);
return II;
}
break;
}
-
+ case Intrinsic::x86_sse41_insertps:
+ if (Value *V = SimplifyX86insertps(*II, *Builder))
+ return ReplaceInstUsesWith(*II, V);
+ break;
+
case Intrinsic::x86_sse4a_insertqi: {
// insertqi x, y, 64, 0 can just copy y's lower bits and leave the top
// ones undef
@@ -867,6 +964,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return ReplaceInstUsesWith(CI, Shuffle);
}
+ case Intrinsic::x86_avx_vperm2f128_pd_256:
+ case Intrinsic::x86_avx_vperm2f128_ps_256:
+ case Intrinsic::x86_avx_vperm2f128_si_256:
+ case Intrinsic::x86_avx2_vperm2i128:
+ if (Value *V = SimplifyX86vperm2(*II, *Builder))
+ return ReplaceInstUsesWith(*II, V);
+ break;
+
case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
// Note that ppc_altivec_vperm has a big-endian bias, so when creating
@@ -906,12 +1011,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
unsigned Idx =
cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
Idx &= 31; // Match the hardware behavior.
- if (DL && DL->isLittleEndian())
+ if (DL.isLittleEndian())
Idx = 31 - Idx;
if (!ExtractedElts[Idx]) {
- Value *Op0ToUse = (DL && DL->isLittleEndian()) ? Op1 : Op0;
- Value *Op1ToUse = (DL && DL->isLittleEndian()) ? Op0 : Op1;
+ Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
+ Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
ExtractedElts[Idx] =
Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
Builder->getInt32(Idx&15));
@@ -940,7 +1045,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::arm_neon_vst2lane:
case Intrinsic::arm_neon_vst3lane:
case Intrinsic::arm_neon_vst4lane: {
- unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, AC, II, DT);
+ unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, II, AC, DT);
unsigned AlignArg = II->getNumArgOperands() - 1;
ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
@@ -1079,7 +1184,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
RHS->getType()->isPointerTy() &&
cast<Constant>(RHS)->isNullValue()) {
LoadInst* LI = cast<LoadInst>(LHS);
- if (isValidAssumeForContext(II, LI, DL, DT)) {
+ if (isValidAssumeForContext(II, LI, DT)) {
MDNode *MD = MDNode::get(II->getContext(), None);
LI->setMetadata(LLVMContext::MD_nonnull, MD);
return EraseInstFromFunction(*II);
@@ -1102,7 +1207,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// facts about the relocate value, while being careful to
// preserve relocation semantics.
GCRelocateOperands Operands(II);
- Value *DerivedPtr = Operands.derivedPtr();
+ Value *DerivedPtr = Operands.getDerivedPtr();
+ auto *GCRelocateType = cast<PointerType>(II->getType());
// Remove the relocation if unused, note that this check is required
// to prevent the cases below from looping forever.
@@ -1113,24 +1219,34 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// TODO: provide a hook for this in GCStrategy. This is clearly legal for
// most practical collectors, but there was discussion in the review thread
// about whether it was legal for all possible collectors.
- if (isa<UndefValue>(DerivedPtr))
- return ReplaceInstUsesWith(*II, DerivedPtr);
+ if (isa<UndefValue>(DerivedPtr)) {
+ // gc_relocate is uncasted. Use undef of gc_relocate's type to replace it.
+ return ReplaceInstUsesWith(*II, UndefValue::get(GCRelocateType));
+ }
// The relocation of null will be null for most any collector.
// TODO: provide a hook for this in GCStrategy. There might be some weird
// collector this property does not hold for.
- if (isa<ConstantPointerNull>(DerivedPtr))
- return ReplaceInstUsesWith(*II, DerivedPtr);
+ if (isa<ConstantPointerNull>(DerivedPtr)) {
+ // gc_relocate is uncasted. Use null-pointer of gc_relocate's type to replace it.
+ return ReplaceInstUsesWith(*II, ConstantPointerNull::get(GCRelocateType));
+ }
// isKnownNonNull -> nonnull attribute
if (isKnownNonNull(DerivedPtr))
II->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
- // TODO: dereferenceable -> deref attribute
+ // isDereferenceablePointer -> deref attribute
+ if (isDereferenceablePointer(DerivedPtr, DL)) {
+ if (Argument *A = dyn_cast<Argument>(DerivedPtr)) {
+ uint64_t Bytes = A->getDereferenceableBytes();
+ II->addDereferenceableAttr(AttributeSet::ReturnIndex, Bytes);
+ }
+ }
// TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
// Canonicalize on the type from the uses to the defs
-
+
// TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
}
}
@@ -1147,8 +1263,8 @@ Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
/// isSafeToEliminateVarargsCast - If this cast does not affect the value
/// passed through the varargs area, we can eliminate the use of the cast.
static bool isSafeToEliminateVarargsCast(const CallSite CS,
- const CastInst * const CI,
- const DataLayout * const DL,
+ const DataLayout &DL,
+ const CastInst *const CI,
const int ix) {
if (!CI->isLosslessCast())
return false;
@@ -1172,7 +1288,7 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
if (!SrcTy->isSized() || !DstTy->isSized())
return false;
- if (!DL || DL->getTypeAllocSize(SrcTy) != DL->getTypeAllocSize(DstTy))
+ if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy))
return false;
return true;
}
@@ -1181,10 +1297,14 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
// Currently we're only working with the checking functions, memcpy_chk,
// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
// strcat_chk and strncat_chk.
-Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *DL) {
+Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
if (!CI->getCalledFunction()) return nullptr;
- if (Value *With = Simplifier->optimizeCall(CI)) {
+ auto InstCombineRAUW = [this](Instruction *From, Value *With) {
+ ReplaceInstUsesWith(*From, With);
+ };
+ LibCallSimplifier Simplifier(DL, TLI, InstCombineRAUW);
+ if (Value *With = Simplifier.optimizeCall(CI)) {
++NumSimplified;
return CI->use_empty() ? CI : ReplaceInstUsesWith(*CI, With);
}
@@ -1342,7 +1462,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(),
E = CS.arg_end(); I != E; ++I, ++ix) {
CastInst *CI = dyn_cast<CastInst>(*I);
- if (CI && isSafeToEliminateVarargsCast(CS, CI, DL, ix)) {
+ if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) {
*I = CI->getOperand(0);
Changed = true;
}
@@ -1359,7 +1479,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
// this. None of these calls are seen as possibly dead so go ahead and
// delete the instruction now.
if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
- Instruction *I = tryOptimizeCall(CI, DL);
+ Instruction *I = tryOptimizeCall(CI);
// If we changed something return the result, etc. Otherwise let
// the fallthrough check.
if (I) return EraseInstFromFunction(*I);
@@ -1409,10 +1529,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
- if (RAttrs.
- hasAttributes(AttributeFuncs::
- typeIncompatible(NewRetTy, AttributeSet::ReturnIndex),
- AttributeSet::ReturnIndex))
+ if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy)))
return false; // Attribute not compatible with transformed value.
}
@@ -1438,7 +1555,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
//
// into:
// call void @takes_i32_inalloca(i32* null)
- if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
+ //
+ // Similarly, avoid folding away bitcasts of byval calls.
+ if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
+ Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
return false;
CallSite::arg_iterator AI = CS.arg_begin();
@@ -1450,8 +1570,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
return false; // Cannot transform this parameter value.
if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1).
- hasAttributes(AttributeFuncs::
- typeIncompatible(ParamTy, i + 1), i + 1))
+ overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
return false; // Attribute not compatible with transformed value.
if (CS.isInAllocaArgument(i))
@@ -1463,12 +1582,12 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1,
Attribute::ByVal)) {
PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
- if (!ParamPTy || !ParamPTy->getElementType()->isSized() || !DL)
+ if (!ParamPTy || !ParamPTy->getElementType()->isSized())
return false;
Type *CurElTy = ActTy->getPointerElementType();
- if (DL->getTypeAllocSize(CurElTy) !=
- DL->getTypeAllocSize(ParamPTy->getElementType()))
+ if (DL.getTypeAllocSize(CurElTy) !=
+ DL.getTypeAllocSize(ParamPTy->getElementType()))
return false;
}
}
@@ -1524,10 +1643,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// If the return value is not being used, the type may not be compatible
// with the existing attributes. Wipe out any problematic attributes.
- RAttrs.
- removeAttributes(AttributeFuncs::
- typeIncompatible(NewRetTy, AttributeSet::ReturnIndex),
- AttributeSet::ReturnIndex);
+ RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
// Add the new return attributes.
if (RAttrs.hasAttributes())
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 5415726..48ab0eb 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -11,11 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
using namespace llvm;
using namespace PatternMatch;
@@ -80,9 +80,6 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
/// try to eliminate the cast by moving the type information into the alloc.
Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
AllocaInst &AI) {
- // This requires DataLayout to get the alloca alignment and size information.
- if (!DL) return nullptr;
-
PointerType *PTy = cast<PointerType>(CI.getType());
BuilderTy AllocaBuilder(*Builder);
@@ -93,8 +90,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
Type *CastElTy = PTy->getElementType();
if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
- unsigned AllocElTyAlign = DL->getABITypeAlignment(AllocElTy);
- unsigned CastElTyAlign = DL->getABITypeAlignment(CastElTy);
+ unsigned AllocElTyAlign = DL.getABITypeAlignment(AllocElTy);
+ unsigned CastElTyAlign = DL.getABITypeAlignment(CastElTy);
if (CastElTyAlign < AllocElTyAlign) return nullptr;
// If the allocation has multiple uses, only promote it if we are strictly
@@ -102,14 +99,14 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
// same, we open the door to infinite loops of various kinds.
if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr;
- uint64_t AllocElTySize = DL->getTypeAllocSize(AllocElTy);
- uint64_t CastElTySize = DL->getTypeAllocSize(CastElTy);
+ uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy);
+ uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy);
if (CastElTySize == 0 || AllocElTySize == 0) return nullptr;
// If the allocation has multiple uses, only promote it if we're not
// shrinking the amount of memory being allocated.
- uint64_t AllocElTyStoreSize = DL->getTypeStoreSize(AllocElTy);
- uint64_t CastElTyStoreSize = DL->getTypeStoreSize(CastElTy);
+ uint64_t AllocElTyStoreSize = DL.getTypeStoreSize(AllocElTy);
+ uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy);
if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr;
// See if we can satisfy the modulus by pulling a scale out of the array
@@ -215,7 +212,8 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
PHINode *OPN = cast<PHINode>(I);
PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues());
for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
- Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
+ Value *V =
+ EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
NPN->addIncoming(V, OPN->getIncomingBlock(i));
}
Res = NPN;
@@ -234,25 +232,22 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
/// This function is a wrapper around CastInst::isEliminableCastPair. It
/// simply extracts arguments and returns what that function returns.
static Instruction::CastOps
-isEliminableCastPair(
- const CastInst *CI, ///< The first cast instruction
- unsigned opcode, ///< The opcode of the second cast instruction
- Type *DstTy, ///< The target type for the second cast instruction
- const DataLayout *DL ///< The target data for pointer size
-) {
-
+isEliminableCastPair(const CastInst *CI, ///< First cast instruction
+ unsigned opcode, ///< Opcode for the second cast
+ Type *DstTy, ///< Target type for the second cast
+ const DataLayout &DL) {
Type *SrcTy = CI->getOperand(0)->getType(); // A from above
Type *MidTy = CI->getType(); // B from above
// Get the opcodes of the two Cast instructions
Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode());
Instruction::CastOps secondOp = Instruction::CastOps(opcode);
- Type *SrcIntPtrTy = DL && SrcTy->isPtrOrPtrVectorTy() ?
- DL->getIntPtrType(SrcTy) : nullptr;
- Type *MidIntPtrTy = DL && MidTy->isPtrOrPtrVectorTy() ?
- DL->getIntPtrType(MidTy) : nullptr;
- Type *DstIntPtrTy = DL && DstTy->isPtrOrPtrVectorTy() ?
- DL->getIntPtrType(DstTy) : nullptr;
+ Type *SrcIntPtrTy =
+ SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
+ Type *MidIntPtrTy =
+ MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
+ Type *DstIntPtrTy =
+ DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
DstTy, SrcIntPtrTy, MidIntPtrTy,
DstIntPtrTy);
@@ -298,7 +293,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
// eliminate it now.
if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
if (Instruction::CastOps opc =
- isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), DL)) {
+ isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), DL)) {
// The first cast (CSrc) is eliminable so we need to fix up or replace
// the second cast (CI). CSrc will then have a good chance of being dead.
return CastInst::Create(opc, CSrc->getOperand(0), CI.getType());
@@ -314,8 +309,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
if (isa<PHINode>(Src)) {
// We don't do this if this would create a PHI node with an illegal type if
// it is currently legal.
- if (!Src->getType()->isIntegerTy() ||
- !CI.getType()->isIntegerTy() ||
+ if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
ShouldChangeType(CI.getType(), Src->getType()))
if (Instruction *NV = FoldOpIntoPhi(CI))
return NV;
@@ -424,8 +418,8 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
// get into trouble with cyclic PHIs here because we only consider
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (!CanEvaluateTruncated(PN->getIncomingValue(i), Ty, IC, CxtI))
+ for (Value *IncValue : PN->incoming_values())
+ if (!CanEvaluateTruncated(IncValue, Ty, IC, CxtI))
return false;
return true;
}
@@ -441,6 +435,15 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
if (Instruction *Result = commonCastTransforms(CI))
return Result;
+ // Test if the trunc is the user of a select which is part of a
+ // minimum or maximum operation. If so, don't do any more simplification.
+ // Even simplifying demanded bits can break the canonical form of a
+ // min/max.
+ Value *LHS, *RHS;
+ if (SelectInst *SI = dyn_cast<SelectInst>(CI.getOperand(0)))
+ if (matchSelectPattern(SI, LHS, RHS) != SPF_UNKNOWN)
+ return nullptr;
+
// See if we can simplify any instructions used by the input whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(CI))
@@ -1035,8 +1038,8 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
// get into trouble with cyclic PHIs here because we only consider
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (!CanEvaluateSExtd(PN->getIncomingValue(i), Ty)) return false;
+ for (Value *IncValue : PN->incoming_values())
+ if (!CanEvaluateSExtd(IncValue, Ty)) return false;
return true;
}
default:
@@ -1064,6 +1067,15 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
Value *Src = CI.getOperand(0);
Type *SrcTy = Src->getType(), *DestTy = CI.getType();
+ // If we know that the value being extended is positive, we can use a zext
+ // instead.
+ bool KnownZero, KnownOne;
+ ComputeSignBit(Src, KnownZero, KnownOne, 0, &CI);
+ if (KnownZero) {
+ Value *ZExt = Builder->CreateZExt(Src, DestTy);
+ return ReplaceInstUsesWith(CI, ZExt);
+ }
+
// Attempt to extend the entire input expression tree to the destination
// type. Only do this if the dest type is a simple type, don't convert the
// expression tree to something weird like i93 unless the source is also
@@ -1332,22 +1344,57 @@ Instruction *InstCombiner::visitFPExt(CastInst &CI) {
return commonCastTransforms(CI);
}
+// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
+// This is safe if the intermediate type has enough bits in its mantissa to
+// accurately represent all values of X. For example, this won't work with
+// i64 -> float -> i64.
+Instruction *InstCombiner::FoldItoFPtoI(Instruction &FI) {
+ if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
+ return nullptr;
+ Instruction *OpI = cast<Instruction>(FI.getOperand(0));
+
+ Value *SrcI = OpI->getOperand(0);
+ Type *FITy = FI.getType();
+ Type *OpITy = OpI->getType();
+ Type *SrcTy = SrcI->getType();
+ bool IsInputSigned = isa<SIToFPInst>(OpI);
+ bool IsOutputSigned = isa<FPToSIInst>(FI);
+
+ // We can safely assume the conversion won't overflow the output range,
+ // because (for example) (uint8_t)18293.f is undefined behavior.
+
+ // Since we can assume the conversion won't overflow, our decision as to
+ // whether the input will fit in the float should depend on the minimum
+ // of the input range and output range.
+
+ // This means this is also safe for a signed input and unsigned output, since
+ // a negative input would lead to undefined behavior.
+ int InputSize = (int)SrcTy->getScalarSizeInBits() - IsInputSigned;
+ int OutputSize = (int)FITy->getScalarSizeInBits() - IsOutputSigned;
+ int ActualSize = std::min(InputSize, OutputSize);
+
+ if (ActualSize <= OpITy->getFPMantissaWidth()) {
+ if (FITy->getScalarSizeInBits() > SrcTy->getScalarSizeInBits()) {
+ if (IsInputSigned && IsOutputSigned)
+ return new SExtInst(SrcI, FITy);
+ return new ZExtInst(SrcI, FITy);
+ }
+ if (FITy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits())
+ return new TruncInst(SrcI, FITy);
+ if (SrcTy == FITy)
+ return ReplaceInstUsesWith(FI, SrcI);
+ return new BitCastInst(SrcI, FITy);
+ }
+ return nullptr;
+}
+
Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
if (!OpI)
return commonCastTransforms(FI);
- // fptoui(uitofp(X)) --> X
- // fptoui(sitofp(X)) --> X
- // This is safe if the intermediate type has enough bits in its mantissa to
- // accurately represent all values of X. For example, do not do this with
- // i64->float->i64. This is also safe for sitofp case, because any negative
- // 'X' value would cause an undefined result for the fptoui.
- if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
- OpI->getOperand(0)->getType() == FI.getType() &&
- (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */
- OpI->getType()->getFPMantissaWidth())
- return ReplaceInstUsesWith(FI, OpI->getOperand(0));
+ if (Instruction *I = FoldItoFPtoI(FI))
+ return I;
return commonCastTransforms(FI);
}
@@ -1357,17 +1404,8 @@ Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
if (!OpI)
return commonCastTransforms(FI);
- // fptosi(sitofp(X)) --> X
- // fptosi(uitofp(X)) --> X
- // This is safe if the intermediate type has enough bits in its mantissa to
- // accurately represent all values of X. For example, do not do this with
- // i64->float->i64. This is also safe for sitofp case, because any negative
- // 'X' value would cause an undefined result for the fptoui.
- if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
- OpI->getOperand(0)->getType() == FI.getType() &&
- (int)FI.getType()->getScalarSizeInBits() <=
- OpI->getType()->getFPMantissaWidth())
- return ReplaceInstUsesWith(FI, OpI->getOperand(0));
+ if (Instruction *I = FoldItoFPtoI(FI))
+ return I;
return commonCastTransforms(FI);
}
@@ -1384,18 +1422,15 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
// If the source integer type is not the intptr_t type for this target, do a
// trunc or zext to the intptr_t type, then inttoptr of it. This allows the
// cast to be exposed to other transforms.
-
- if (DL) {
- unsigned AS = CI.getAddressSpace();
- if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
- DL->getPointerSizeInBits(AS)) {
- Type *Ty = DL->getIntPtrType(CI.getContext(), AS);
- if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
- Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
-
- Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
- return new IntToPtrInst(P, CI.getType());
- }
+ unsigned AS = CI.getAddressSpace();
+ if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
+ DL.getPointerSizeInBits(AS)) {
+ Type *Ty = DL.getIntPtrType(CI.getContext(), AS);
+ if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
+ Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
+
+ Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
+ return new IntToPtrInst(P, CI.getType());
}
if (Instruction *I = commonCastTransforms(CI))
@@ -1424,41 +1459,6 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
CI.setOperand(0, GEP->getOperand(0));
return &CI;
}
-
- if (!DL)
- return commonCastTransforms(CI);
-
- // If the GEP has a single use, and the base pointer is a bitcast, and the
- // GEP computes a constant offset, see if we can convert these three
- // instructions into fewer. This typically happens with unions and other
- // non-type-safe code.
- unsigned AS = GEP->getPointerAddressSpace();
- unsigned OffsetBits = DL->getPointerSizeInBits(AS);
- APInt Offset(OffsetBits, 0);
- BitCastInst *BCI = dyn_cast<BitCastInst>(GEP->getOperand(0));
- if (GEP->hasOneUse() &&
- BCI &&
- GEP->accumulateConstantOffset(*DL, Offset)) {
- // Get the base pointer input of the bitcast, and the type it points to.
- Value *OrigBase = BCI->getOperand(0);
- SmallVector<Value*, 8> NewIndices;
- if (FindElementAtOffset(OrigBase->getType(),
- Offset.getSExtValue(),
- NewIndices)) {
- // If we were able to index down into an element, create the GEP
- // and bitcast the result. This eliminates one bitcast, potentially
- // two.
- Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ?
- Builder->CreateInBoundsGEP(OrigBase, NewIndices) :
- Builder->CreateGEP(OrigBase, NewIndices);
- NGEP->takeName(GEP);
-
- if (isa<BitCastInst>(CI))
- return new BitCastInst(NGEP, CI.getType());
- assert(isa<PtrToIntInst>(CI));
- return new PtrToIntInst(NGEP, CI.getType());
- }
- }
}
return commonCastTransforms(CI);
@@ -1469,16 +1469,13 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
// do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
// to be exposed to other transforms.
- if (!DL)
- return commonPointerCastTransforms(CI);
-
Type *Ty = CI.getType();
unsigned AS = CI.getPointerAddressSpace();
- if (Ty->getScalarSizeInBits() == DL->getPointerSizeInBits(AS))
+ if (Ty->getScalarSizeInBits() == DL.getPointerSizeInBits(AS))
return commonPointerCastTransforms(CI);
- Type *PtrTy = DL->getIntPtrType(CI.getContext(), AS);
+ Type *PtrTy = DL.getIntPtrType(CI.getContext(), AS);
if (Ty->isVectorTy()) // Handle vectors of pointers.
PtrTy = VectorType::get(PtrTy, Ty->getVectorNumElements());
@@ -1562,8 +1559,8 @@ static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
/// This returns false if the pattern can't be matched or true if it can,
/// filling in Elements with the elements found here.
static bool CollectInsertionElements(Value *V, unsigned Shift,
- SmallVectorImpl<Value*> &Elements,
- Type *VecEltTy, InstCombiner &IC) {
+ SmallVectorImpl<Value *> &Elements,
+ Type *VecEltTy, bool isBigEndian) {
assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
"Shift should be a multiple of the element type size");
@@ -1579,7 +1576,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
return true;
unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
- if (IC.getDataLayout()->isBigEndian())
+ if (isBigEndian)
ElementIndex = Elements.size() - ElementIndex - 1;
// Fail if multiple elements are inserted into this slot.
@@ -1599,7 +1596,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
// it to the right type so it gets properly inserted.
if (NumElts == 1)
return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
- Shift, Elements, VecEltTy, IC);
+ Shift, Elements, VecEltTy, isBigEndian);
// Okay, this is a constant that covers multiple elements. Slice it up into
// pieces and insert each element-sized piece into the vector.
@@ -1614,7 +1611,8 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
ShiftI));
Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
- if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy, IC))
+ if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy,
+ isBigEndian))
return false;
}
return true;
@@ -1627,28 +1625,28 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
switch (I->getOpcode()) {
default: return false; // Unhandled case.
case Instruction::BitCast:
- return CollectInsertionElements(I->getOperand(0), Shift,
- Elements, VecEltTy, IC);
+ return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ isBigEndian);
case Instruction::ZExt:
if (!isMultipleOfTypeSize(
I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
VecEltTy))
return false;
- return CollectInsertionElements(I->getOperand(0), Shift,
- Elements, VecEltTy, IC);
+ return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ isBigEndian);
case Instruction::Or:
- return CollectInsertionElements(I->getOperand(0), Shift,
- Elements, VecEltTy, IC) &&
- CollectInsertionElements(I->getOperand(1), Shift,
- Elements, VecEltTy, IC);
+ return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ isBigEndian) &&
+ CollectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
+ isBigEndian);
case Instruction::Shl: {
// Must be shifting by a constant that is a multiple of the element size.
ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
if (!CI) return false;
Shift += CI->getZExtValue();
if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
- return CollectInsertionElements(I->getOperand(0), Shift,
- Elements, VecEltTy, IC);
+ return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ isBigEndian);
}
}
@@ -1671,15 +1669,13 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
/// Into two insertelements that do "buildvector{%inc, %inc5}".
static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
InstCombiner &IC) {
- // We need to know the target byte order to perform this optimization.
- if (!IC.getDataLayout()) return nullptr;
-
VectorType *DestVecTy = cast<VectorType>(CI.getType());
Value *IntInput = CI.getOperand(0);
SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
if (!CollectInsertionElements(IntInput, 0, Elements,
- DestVecTy->getElementType(), IC))
+ DestVecTy->getElementType(),
+ IC.getDataLayout().isBigEndian()))
return nullptr;
// If we succeeded, we know that all of the element are specified by Elements
@@ -1699,10 +1695,8 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
/// bitcast. The various long double bitcasts can't get in here.
-static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
- // We need to know the target byte order to perform this optimization.
- if (!IC.getDataLayout()) return nullptr;
-
+static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI, InstCombiner &IC,
+ const DataLayout &DL) {
Value *Src = CI.getOperand(0);
Type *DestTy = CI.getType();
@@ -1725,7 +1719,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
}
unsigned Elt = 0;
- if (IC.getDataLayout()->isBigEndian())
+ if (DL.isBigEndian())
Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1;
return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
}
@@ -1749,7 +1743,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
}
unsigned Elt = ShAmt->getZExtValue() / DestWidth;
- if (IC.getDataLayout()->isBigEndian())
+ if (DL.isBigEndian())
Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt;
return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
}
@@ -1785,26 +1779,24 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// If the source and destination are pointers, and this cast is equivalent
// to a getelementptr X, 0, 0, 0... turn it into the appropriate gep.
// This can enhance SROA and other transforms that want type-safe pointers.
- Constant *ZeroUInt =
- Constant::getNullValue(Type::getInt32Ty(CI.getContext()));
unsigned NumZeros = 0;
while (SrcElTy != DstElTy &&
isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() &&
SrcElTy->getNumContainedTypes() /* not "{}" */) {
- SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
+ SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(0U);
++NumZeros;
}
// If we found a path from the src to dest, create the getelementptr now.
if (SrcElTy == DstElTy) {
- SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
+ SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder->getInt32(0));
return GetElementPtrInst::CreateInBounds(Src, Idxs);
}
}
// Try to optimize int -> float bitcasts.
if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
- if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this))
+ if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this, DL))
return I;
if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index c07c96d..f53eeef 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
@@ -24,7 +24,7 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
using namespace llvm;
using namespace PatternMatch;
@@ -229,10 +229,6 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
Instruction *InstCombiner::
FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
CmpInst &ICI, ConstantInt *AndCst) {
- // We need TD information to know the pointer size unless this is inbounds.
- if (!GEP->isInBounds() && !DL)
- return nullptr;
-
Constant *Init = GV->getInitializer();
if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
return nullptr;
@@ -303,7 +299,6 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// the array, this will fully represent all the comparison results.
uint64_t MagicBitvector = 0;
-
// Scan the array and see if one of our patterns matches.
Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
@@ -398,7 +393,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// index down like the GEP would do implicitly. We don't have to do this for
// an inbounds GEP because the index can't be out of range.
if (!GEP->isInBounds()) {
- Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
+ Type *IntPtrTy = DL.getIntPtrType(GEP->getType());
unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize)
Idx = Builder->CreateTrunc(Idx, IntPtrTy);
@@ -487,10 +482,8 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// - Default to i32
if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
Ty = Idx->getType();
- else if (DL)
- Ty = DL->getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
- else if (ArrayElementCount <= 32)
- Ty = Type::getInt32Ty(Init->getContext());
+ else
+ Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
if (Ty) {
Value *V = Builder->CreateIntCast(Idx, Ty, false);
@@ -514,8 +507,8 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
///
/// If we can't emit an optimized form for this expression, this returns null.
///
-static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
- const DataLayout &DL = *IC.getDataLayout();
+static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
+ const DataLayout &DL) {
gep_type_iterator GTI = gep_type_begin(GEP);
// Check to see if this gep only has a single variable index. If so, and if
@@ -628,12 +621,12 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
RHS = RHS->stripPointerCasts();
Value *PtrBase = GEPLHS->getOperand(0);
- if (DL && PtrBase == RHS && GEPLHS->isInBounds()) {
+ if (PtrBase == RHS && GEPLHS->isInBounds()) {
// ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0).
// This transformation (ignoring the base and scales) is valid because we
// know pointers can't overflow since the gep is inbounds. See if we can
// output an optimized form.
- Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this);
+ Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this, DL);
// If not, synthesize the offset the hard way.
if (!Offset)
@@ -661,11 +654,11 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
// If we're comparing GEPs with two base pointers that only differ in type
// and both GEPs have only constant indices or just one use, then fold
// the compare with the adjusted indices.
- if (DL && GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
+ if (GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
(GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) &&
(GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) &&
PtrBase->stripPointerCasts() ==
- GEPRHS->getOperand(0)->stripPointerCasts()) {
+ GEPRHS->getOperand(0)->stripPointerCasts()) {
Value *LOffset = EmitGEPOffset(GEPLHS);
Value *ROffset = EmitGEPOffset(GEPRHS);
@@ -733,9 +726,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
// Only lower this if the icmp is the only user of the GEP or if we expect
// the result to fold to a constant!
- if (DL &&
- GEPsInBounds &&
- (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
+ if (GEPsInBounds && (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
(isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
// ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2)
Value *L = EmitGEPOffset(GEPLHS);
@@ -1928,17 +1919,20 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
// Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
// integer type is the same size as the pointer type.
- if (DL && LHSCI->getOpcode() == Instruction::PtrToInt &&
- DL->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) {
+ if (LHSCI->getOpcode() == Instruction::PtrToInt &&
+ DL.getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) {
Value *RHSOp = nullptr;
- if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
+ if (PtrToIntOperator *RHSC = dyn_cast<PtrToIntOperator>(ICI.getOperand(1))) {
+ Value *RHSCIOp = RHSC->getOperand(0);
+ if (RHSCIOp->getType()->getPointerAddressSpace() ==
+ LHSCIOp->getType()->getPointerAddressSpace()) {
+ RHSOp = RHSC->getOperand(0);
+ // If the pointer types don't match, insert a bitcast.
+ if (LHSCIOp->getType() != RHSOp->getType())
+ RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType());
+ }
+ } else if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1)))
RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
- } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) {
- RHSOp = RHSC->getOperand(0);
- // If the pointer types don't match, insert a bitcast.
- if (LHSCIOp->getType() != RHSOp->getType())
- RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType());
- }
if (RHSOp)
return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp);
@@ -2103,7 +2097,7 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName()+".trunc");
Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName()+".trunc");
- CallInst *Call = Builder->CreateCall2(F, TruncA, TruncB, "sadd");
+ CallInst *Call = Builder->CreateCall(F, {TruncA, TruncB}, "sadd");
Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result");
Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType());
@@ -2115,33 +2109,96 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
return ExtractValueInst::Create(Call, 1, "sadd.overflow");
}
-static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
- InstCombiner &IC) {
- // Don't bother doing this transformation for pointers, don't do it for
- // vectors.
- if (!isa<IntegerType>(OrigAddV->getType())) return nullptr;
+bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
+ Value *RHS, Instruction &OrigI,
+ Value *&Result, Constant *&Overflow) {
+ assert((!OrigI.isCommutative() ||
+ !(isa<Constant>(LHS) && !isa<Constant>(RHS))) &&
+ "call with a constant RHS if possible!");
+
+ auto SetResult = [&](Value *OpResult, Constant *OverflowVal, bool ReuseName) {
+ Result = OpResult;
+ Overflow = OverflowVal;
+ if (ReuseName)
+ Result->takeName(&OrigI);
+ return true;
+ };
- // If the add is a constant expr, then we don't bother transforming it.
- Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV);
- if (!OrigAdd) return nullptr;
+ switch (OCF) {
+ case OCF_INVALID:
+ llvm_unreachable("bad overflow check kind!");
- Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1);
+ case OCF_UNSIGNED_ADD: {
+ OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, &OrigI);
+ if (OR == OverflowResult::NeverOverflows)
+ return SetResult(Builder->CreateNUWAdd(LHS, RHS), Builder->getFalse(),
+ true);
- // Put the new code above the original add, in case there are any uses of the
- // add between the add and the compare.
- InstCombiner::BuilderTy *Builder = IC.Builder;
- Builder->SetInsertPoint(OrigAdd);
+ if (OR == OverflowResult::AlwaysOverflows)
+ return SetResult(Builder->CreateAdd(LHS, RHS), Builder->getTrue(), true);
+ }
+ // FALL THROUGH uadd into sadd
+ case OCF_SIGNED_ADD: {
+ // X + 0 -> {X, false}
+ if (match(RHS, m_Zero()))
+ return SetResult(LHS, Builder->getFalse(), false);
+
+ // We can strength reduce this signed add into a regular add if we can prove
+ // that it will never overflow.
+ if (OCF == OCF_SIGNED_ADD)
+ if (WillNotOverflowSignedAdd(LHS, RHS, OrigI))
+ return SetResult(Builder->CreateNSWAdd(LHS, RHS), Builder->getFalse(),
+ true);
+ break;
+ }
- Module *M = I.getParent()->getParent()->getParent();
- Type *Ty = LHS->getType();
- Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
- CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd");
- Value *Add = Builder->CreateExtractValue(Call, 0);
+ case OCF_UNSIGNED_SUB:
+ case OCF_SIGNED_SUB: {
+ // X - 0 -> {X, false}
+ if (match(RHS, m_Zero()))
+ return SetResult(LHS, Builder->getFalse(), false);
- IC.ReplaceInstUsesWith(*OrigAdd, Add);
+ if (OCF == OCF_SIGNED_SUB) {
+ if (WillNotOverflowSignedSub(LHS, RHS, OrigI))
+ return SetResult(Builder->CreateNSWSub(LHS, RHS), Builder->getFalse(),
+ true);
+ } else {
+ if (WillNotOverflowUnsignedSub(LHS, RHS, OrigI))
+ return SetResult(Builder->CreateNUWSub(LHS, RHS), Builder->getFalse(),
+ true);
+ }
+ break;
+ }
- // The original icmp gets replaced with the overflow value.
- return ExtractValueInst::Create(Call, 1, "uadd.overflow");
+ case OCF_UNSIGNED_MUL: {
+ OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, &OrigI);
+ if (OR == OverflowResult::NeverOverflows)
+ return SetResult(Builder->CreateNUWMul(LHS, RHS), Builder->getFalse(),
+ true);
+ if (OR == OverflowResult::AlwaysOverflows)
+ return SetResult(Builder->CreateMul(LHS, RHS), Builder->getTrue(), true);
+ } // FALL THROUGH
+ case OCF_SIGNED_MUL:
+ // X * undef -> undef
+ if (isa<UndefValue>(RHS))
+ return SetResult(RHS, UndefValue::get(Builder->getInt1Ty()), false);
+
+ // X * 0 -> {0, false}
+ if (match(RHS, m_Zero()))
+ return SetResult(RHS, Builder->getFalse(), false);
+
+ // X * 1 -> {X, false}
+ if (match(RHS, m_One()))
+ return SetResult(LHS, Builder->getFalse(), false);
+
+ if (OCF == OCF_SIGNED_MUL)
+ if (WillNotOverflowSignedMul(LHS, RHS, OrigI))
+ return SetResult(Builder->CreateNSWMul(LHS, RHS), Builder->getFalse(),
+ true);
+ break;
+ }
+
+ return false;
}
/// \brief Recognize and process idiom involving test for multiplication
@@ -2311,7 +2368,7 @@ static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal,
MulB = Builder->CreateZExt(B, MulType);
Value *F =
Intrinsic::getDeclaration(M, Intrinsic::umul_with_overflow, MulType);
- CallInst *Call = Builder->CreateCall2(F, MulA, MulB, "umul");
+ CallInst *Call = Builder->CreateCall(F, {MulA, MulB}, "umul");
IC.Worklist.Add(MulInstr);
// If there are uses of mul result other than the comparison, we know that
@@ -2657,8 +2714,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
unsigned BitWidth = 0;
if (Ty->isIntOrIntVectorTy())
BitWidth = Ty->getScalarSizeInBits();
- else if (DL) // Pointers require DL info to get their size.
- BitWidth = DL->getTypeSizeInBits(Ty->getScalarType());
+ else // Get pointer size.
+ BitWidth = DL.getTypeSizeInBits(Ty->getScalarType());
bool isSignBit = false;
@@ -2771,8 +2828,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
Op0KnownZero, Op0KnownOne, 0))
return &I;
if (SimplifyDemandedBits(I.getOperandUse(1),
- APInt::getAllOnesValue(BitWidth),
- Op1KnownZero, Op1KnownOne, 0))
+ APInt::getAllOnesValue(BitWidth), Op1KnownZero,
+ Op1KnownOne, 0))
return &I;
// Given the known and unknown bits, compute a range that the LHS could be
@@ -3091,9 +3148,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
case Instruction::IntToPtr:
// icmp pred inttoptr(X), null -> icmp pred X, 0
- if (RHSC->isNullValue() && DL &&
- DL->getIntPtrType(RHSC->getType()) ==
- LHSI->getOperand(0)->getType())
+ if (RHSC->isNullValue() &&
+ DL.getIntPtrType(RHSC->getType()) == LHSI->getOperand(0)->getType())
return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
Constant::getNullValue(LHSI->getOperand(0)->getType()));
break;
@@ -3425,7 +3481,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// if A is a power of 2.
if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
match(Op1, m_Zero()) &&
- isKnownToBeAPowerOfTwo(A, false, 0, AC, &I, DT) && I.isEquality())
+ isKnownToBeAPowerOfTwo(A, DL, false, 0, AC, &I, DT) && I.isEquality())
return new ICmpInst(I.getInversePredicate(),
Builder->CreateAnd(A, B),
Op1);
@@ -3439,21 +3495,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A);
}
- // (a+b) <u a --> llvm.uadd.with.overflow.
- // (a+b) <u b --> llvm.uadd.with.overflow.
- if (I.getPredicate() == ICmpInst::ICMP_ULT &&
- match(Op0, m_Add(m_Value(A), m_Value(B))) &&
- (Op1 == A || Op1 == B))
- if (Instruction *R = ProcessUAddIdiom(I, Op0, *this))
- return R;
-
- // a >u (a+b) --> llvm.uadd.with.overflow.
- // b >u (a+b) --> llvm.uadd.with.overflow.
- if (I.getPredicate() == ICmpInst::ICMP_UGT &&
- match(Op1, m_Add(m_Value(A), m_Value(B))) &&
- (Op0 == A || Op0 == B))
- if (Instruction *R = ProcessUAddIdiom(I, Op1, *this))
- return R;
+ Instruction *AddI = nullptr;
+ if (match(&I, m_UAddWithOverflow(m_Value(A), m_Value(B),
+ m_Instruction(AddI))) &&
+ isa<IntegerType>(A->getType())) {
+ Value *Result;
+ Constant *Overflow;
+ if (OptimizeOverflowCheck(OCF_UNSIGNED_ADD, A, B, *AddI, Result,
+ Overflow)) {
+ ReplaceInstUsesWith(*AddI, Result);
+ return ReplaceInstUsesWith(I, Overflow);
+ }
+ }
// (zext a) * (zext b) --> llvm.umul.with.overflow.
if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
@@ -3560,6 +3613,21 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
}
+ // (A << C) == (B << C) --> ((A^B) & (~0U >> C)) == 0
+ if (match(Op0, m_OneUse(m_Shl(m_Value(A), m_ConstantInt(Cst1)))) &&
+ match(Op1, m_OneUse(m_Shl(m_Value(B), m_Specific(Cst1))))) {
+ unsigned TypeBits = Cst1->getBitWidth();
+ unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
+ if (ShAmt < TypeBits && ShAmt != 0) {
+ Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted");
+ APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt);
+ Value *And = Builder->CreateAnd(Xor, Builder->getInt(AndVal),
+ I.getName() + ".mask");
+ return new ICmpInst(I.getPredicate(), And,
+ Constant::getNullValue(Cst1->getType()));
+ }
+ }
+
// Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
// "icmp (and X, mask), cst"
uint64_t ShAmt = 0;
@@ -3886,6 +3954,19 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
}
}
+ // Test if the FCmpInst instruction is used exclusively by a select as
+ // part of a minimum or maximum operation. If so, refrain from doing
+ // any other folding. This helps out other analyses which understand
+ // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
+ // and CodeGen. And in this case, at least one of the comparison
+ // operands has at least one user besides the compare (the select),
+ // which would often largely negate the benefit of folding anyway.
+ if (I.hasOneUse())
+ if (SelectInst *SI = dyn_cast<SelectInst>(*I.user_begin()))
+ if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
+ (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
+ return nullptr;
+
// Handle fcmp with constant RHS
if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 3c3c135..97ea8df 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -1,4 +1,4 @@
-//===- InstCombine.h - Main InstCombine pass definition ---------*- C++ -*-===//
+//===- InstCombineInternal.h - InstCombine pass internals -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,12 +6,17 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file provides internal interfaces used to implement the InstCombine.
+///
+//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINE_H
-#define LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINE_H
+#ifndef LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H
+#define LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H
-#include "InstCombineWorklist.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Dominators.h"
@@ -21,7 +26,7 @@
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Pass.h"
-#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
#define DEBUG_TYPE "instcombine"
@@ -34,20 +39,15 @@ class DbgDeclareInst;
class MemIntrinsic;
class MemSetInst;
-/// SelectPatternFlavor - We can match a variety of different patterns for
-/// select operations.
-enum SelectPatternFlavor {
- SPF_UNKNOWN = 0,
- SPF_SMIN,
- SPF_UMIN,
- SPF_SMAX,
- SPF_UMAX,
- SPF_ABS,
- SPF_NABS
-};
-
-/// getComplexity: Assign a complexity or rank value to LLVM Values...
-/// 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst
+/// \brief Assign a complexity or rank value to LLVM Values.
+///
+/// This routine maps IR values to various complexity ranks:
+/// 0 -> undef
+/// 1 -> Constants
+/// 2 -> Other non-instructions
+/// 3 -> Arguments
+/// 3 -> Unary operations
+/// 4 -> Other instructions
static inline unsigned getComplexity(Value *V) {
if (isa<Instruction>(V)) {
if (BinaryOperator::isNeg(V) || BinaryOperator::isFNeg(V) ||
@@ -60,18 +60,82 @@ static inline unsigned getComplexity(Value *V) {
return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2;
}
-/// AddOne - Add one to a Constant
+/// \brief Add one to a Constant
static inline Constant *AddOne(Constant *C) {
return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
}
-/// SubOne - Subtract one from a Constant
+/// \brief Subtract one from a Constant
static inline Constant *SubOne(Constant *C) {
return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
}
-/// InstCombineIRInserter - This is an IRBuilder insertion helper that works
-/// just like the normal insertion helper, but also adds any new instructions
-/// to the instcombine worklist.
+/// \brief Return true if the specified value is free to invert (apply ~ to).
+/// This happens in cases where the ~ can be eliminated. If WillInvertAllUses
+/// is true, work under the assumption that the caller intends to remove all
+/// uses of V and only keep uses of ~V.
+///
+static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) {
+ // ~(~(X)) -> X.
+ if (BinaryOperator::isNot(V))
+ return true;
+
+ // Constants can be considered to be not'ed values.
+ if (isa<ConstantInt>(V))
+ return true;
+
+ // Compares can be inverted if all of their uses are being modified to use the
+ // ~V.
+ if (isa<CmpInst>(V))
+ return WillInvertAllUses;
+
+ // If `V` is of the form `A + Constant` then `-1 - V` can be folded into `(-1
+ // - Constant) - A` if we are willing to invert all of the uses.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V))
+ if (BO->getOpcode() == Instruction::Add ||
+ BO->getOpcode() == Instruction::Sub)
+ if (isa<Constant>(BO->getOperand(0)) || isa<Constant>(BO->getOperand(1)))
+ return WillInvertAllUses;
+
+ return false;
+}
+
+
+/// \brief Specific patterns of overflow check idioms that we match.
+enum OverflowCheckFlavor {
+ OCF_UNSIGNED_ADD,
+ OCF_SIGNED_ADD,
+ OCF_UNSIGNED_SUB,
+ OCF_SIGNED_SUB,
+ OCF_UNSIGNED_MUL,
+ OCF_SIGNED_MUL,
+
+ OCF_INVALID
+};
+
+/// \brief Returns the OverflowCheckFlavor corresponding to a overflow_with_op
+/// intrinsic.
+static inline OverflowCheckFlavor
+IntrinsicIDToOverflowCheckFlavor(unsigned ID) {
+ switch (ID) {
+ default:
+ return OCF_INVALID;
+ case Intrinsic::uadd_with_overflow:
+ return OCF_UNSIGNED_ADD;
+ case Intrinsic::sadd_with_overflow:
+ return OCF_SIGNED_ADD;
+ case Intrinsic::usub_with_overflow:
+ return OCF_UNSIGNED_SUB;
+ case Intrinsic::ssub_with_overflow:
+ return OCF_SIGNED_SUB;
+ case Intrinsic::umul_with_overflow:
+ return OCF_UNSIGNED_MUL;
+ case Intrinsic::smul_with_overflow:
+ return OCF_SIGNED_MUL;
+ }
+}
+
+/// \brief An IRBuilder inserter that adds new instructions to the instcombine
+/// worklist.
class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter
: public IRBuilderDefaultInserter<true> {
InstCombineWorklist &Worklist;
@@ -92,47 +156,60 @@ public:
}
};
-/// InstCombiner - The -instcombine pass.
+/// \brief The core instruction combiner logic.
+///
+/// This class provides both the logic to recursively visit instructions and
+/// combine them, as well as the pass infrastructure for running this as part
+/// of the LLVM pass pipeline.
class LLVM_LIBRARY_VISIBILITY InstCombiner
- : public FunctionPass,
- public InstVisitor<InstCombiner, Instruction *> {
- AssumptionCache *AC;
- const DataLayout *DL;
- TargetLibraryInfo *TLI;
- DominatorTree *DT;
- bool MadeIRChange;
- LibCallSimplifier *Simplifier;
- bool MinimizeSize;
-
+ : public InstVisitor<InstCombiner, Instruction *> {
+ // FIXME: These members shouldn't be public.
public:
- /// Worklist - All of the instructions that need to be simplified.
- InstCombineWorklist Worklist;
+ /// \brief A worklist of the instructions that need to be simplified.
+ InstCombineWorklist &Worklist;
- /// Builder - This is an IRBuilder that automatically inserts new
- /// instructions into the worklist when they are created.
+ /// \brief An IRBuilder that automatically inserts new instructions into the
+ /// worklist.
typedef IRBuilder<true, TargetFolder, InstCombineIRInserter> BuilderTy;
BuilderTy *Builder;
- static char ID; // Pass identification, replacement for typeid
- InstCombiner()
- : FunctionPass(ID), DL(nullptr), DT(nullptr), Builder(nullptr) {
- MinimizeSize = false;
- initializeInstCombinerPass(*PassRegistry::getPassRegistry());
- }
+private:
+ // Mode in which we are running the combiner.
+ const bool MinimizeSize;
-public:
- bool runOnFunction(Function &F) override;
+ // Required analyses.
+ // FIXME: These can never be null and should be references.
+ AssumptionCache *AC;
+ TargetLibraryInfo *TLI;
+ DominatorTree *DT;
+ const DataLayout &DL;
+
+ // Optional analyses. When non-null, these can both be used to do better
+ // combining and will be updated to reflect any changes.
+ LoopInfo *LI;
+
+ bool MadeIRChange;
- bool DoOneIteration(Function &F, unsigned ItNum);
+public:
+ InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder,
+ bool MinimizeSize, AssumptionCache *AC, TargetLibraryInfo *TLI,
+ DominatorTree *DT, const DataLayout &DL, LoopInfo *LI)
+ : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize),
+ AC(AC), TLI(TLI), DT(DT), DL(DL), LI(LI), MadeIRChange(false) {}
- void getAnalysisUsage(AnalysisUsage &AU) const override;
+ /// \brief Run the combiner over the entire worklist until it is empty.
+ ///
+ /// \returns true if the IR is changed.
+ bool run();
AssumptionCache *getAssumptionCache() const { return AC; }
- const DataLayout *getDataLayout() const { return DL; }
-
+ const DataLayout &getDataLayout() const { return DL; }
+
DominatorTree *getDominatorTree() const { return DT; }
+ LoopInfo *getLoopInfo() const { return LI; }
+
TargetLibraryInfo *getTargetLibraryInfo() const { return TLI; }
// Visitation implementation - Implement instruction combining for different
@@ -222,6 +299,7 @@ public:
Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1,
Value *A, Value *B, Instruction &Outer,
SelectPatternFlavor SPF2, Value *C);
+ Instruction *FoldItoFPtoI(Instruction &FI);
Instruction *visitSelectInst(SelectInst &SI);
Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI);
Instruction *visitCallInst(CallInst &CI);
@@ -262,37 +340,51 @@ private:
bool ShouldChangeType(Type *From, Type *To) const;
Value *dyn_castNegVal(Value *V) const;
Value *dyn_castFNegVal(Value *V, bool NoSignedZero = false) const;
- Type *FindElementAtOffset(Type *PtrTy, int64_t Offset,
+ Type *FindElementAtOffset(PointerType *PtrTy, int64_t Offset,
SmallVectorImpl<Value *> &NewIndices);
Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
- /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
- /// results in any code being generated and is interesting to optimize out. If
- /// the cast can be eliminated by some other simple transformation, we prefer
- /// to do the simplification first.
+ /// \brief Classify whether a cast is worth optimizing.
+ ///
+ /// Returns true if the cast from "V to Ty" actually results in any code
+ /// being generated and is interesting to optimize out. If the cast can be
+ /// eliminated by some other simple transformation, we prefer to do the
+ /// simplification first.
bool ShouldOptimizeCast(Instruction::CastOps opcode, const Value *V,
Type *Ty);
+ /// \brief Try to optimize a sequence of instructions checking if an operation
+ /// on LHS and RHS overflows.
+ ///
+ /// If a simplification is possible, stores the simplified result of the
+ /// operation in OperationResult and result of the overflow check in
+ /// OverflowResult, and return true. If no simplification is possible,
+ /// returns false.
+ bool OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, Value *RHS,
+ Instruction &CtxI, Value *&OperationResult,
+ Constant *&OverflowResult);
+
Instruction *visitCallSite(CallSite CS);
- Instruction *tryOptimizeCall(CallInst *CI, const DataLayout *DL);
+ Instruction *tryOptimizeCall(CallInst *CI);
bool transformConstExprCastCall(CallSite CS);
Instruction *transformCallThroughTrampoline(CallSite CS,
IntrinsicInst *Tramp);
Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
bool DoXform = true);
Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
- bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction *CxtI);
- bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction *CxtI);
- bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction *CxtI);
- bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction *CxtI);
+ bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction &CxtI);
+ bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction &CxtI);
+ bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction &CxtI);
+ bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction &CxtI);
Value *EmitGEPOffset(User *GEP);
Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask);
public:
- // InsertNewInstBefore - insert an instruction New before instruction Old
- // in the program. Add the new instruction to the worklist.
- //
+ /// \brief Inserts an instruction \p New before instruction \p Old
+ ///
+ /// Also adds the new instruction to the worklist and returns \p New so that
+ /// it is suitable for use as the return from the visitation patterns.
Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) {
assert(New && !New->getParent() &&
"New instruction already inserted into a basic block!");
@@ -302,21 +394,23 @@ public:
return New;
}
- // InsertNewInstWith - same as InsertNewInstBefore, but also sets the
- // debug loc.
- //
+ /// \brief Same as InsertNewInstBefore, but also sets the debug loc.
Instruction *InsertNewInstWith(Instruction *New, Instruction &Old) {
New->setDebugLoc(Old.getDebugLoc());
return InsertNewInstBefore(New, Old);
}
- // ReplaceInstUsesWith - This method is to be used when an instruction is
- // found to be dead, replacable with another preexisting expression. Here
- // we add all uses of I to the worklist, replace all uses of I with the new
- // value, then return I, so that the inst combiner will know that I was
- // modified.
- //
+ /// \brief A combiner-aware RAUW-like routine.
+ ///
+ /// This method is to be used when an instruction is found to be dead,
+ /// replacable with another preexisting expression. Here we add all uses of
+ /// I to the worklist, replace all uses of I with the new value, then return
+ /// I, so that the inst combiner will know that I was modified.
Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) {
+ // If there are no uses to replace, then we return nullptr to indicate that
+ // no changes were made to the program.
+ if (I.use_empty()) return nullptr;
+
Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist.
// If we are replacing the instruction with itself, this must be in a
@@ -325,30 +419,27 @@ public:
V = UndefValue::get(I.getType());
DEBUG(dbgs() << "IC: Replacing " << I << "\n"
- " with " << *V << '\n');
+ << " with " << *V << '\n');
I.replaceAllUsesWith(V);
return &I;
}
/// Creates a result tuple for an overflow intrinsic \p II with a given
- /// \p Result and a constant \p Overflow value. If \p ReUseName is true the
- /// \p Result's name is taken from \p II.
+ /// \p Result and a constant \p Overflow value.
Instruction *CreateOverflowTuple(IntrinsicInst *II, Value *Result,
- bool Overflow, bool ReUseName = true) {
- if (ReUseName)
- Result->takeName(II);
- Constant *V[] = { UndefValue::get(Result->getType()),
- Overflow ? Builder->getTrue() : Builder->getFalse() };
+ Constant *Overflow) {
+ Constant *V[] = {UndefValue::get(Result->getType()), Overflow};
StructType *ST = cast<StructType>(II->getType());
Constant *Struct = ConstantStruct::get(ST, V);
return InsertValueInst::Create(Struct, Result, 0);
}
-
- // EraseInstFromFunction - When dealing with an instruction that has side
- // effects or produces a void value, we can't rely on DCE to delete the
- // instruction. Instead, visit methods should return the value returned by
- // this function.
+
+ /// \brief Combiner aware instruction erasure.
+ ///
+ /// When dealing with an instruction that has side effects or produces a void
+ /// value, we can't rely on DCE to delete the instruction. Instead, visit
+ /// methods should return the value returned by this function.
Instruction *EraseInstFromFunction(Instruction &I) {
DEBUG(dbgs() << "IC: ERASE " << I << '\n');
@@ -367,13 +458,12 @@ public:
}
void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
- unsigned Depth = 0, Instruction *CxtI = nullptr) const {
+ unsigned Depth, Instruction *CxtI) const {
return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth, AC, CxtI,
DT);
}
- bool MaskedValueIsZero(Value *V, const APInt &Mask,
- unsigned Depth = 0,
+ bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth = 0,
Instruction *CxtI = nullptr) const {
return llvm::MaskedValueIsZero(V, Mask, DL, Depth, AC, CxtI, DT);
}
@@ -396,22 +486,24 @@ public:
}
private:
- /// SimplifyAssociativeOrCommutative - This performs a few simplifications for
- /// operators which are associative or commutative.
+ /// \brief Performs a few simplifications for operators which are associative
+ /// or commutative.
bool SimplifyAssociativeOrCommutative(BinaryOperator &I);
- /// SimplifyUsingDistributiveLaws - This tries to simplify binary operations
- /// which some other binary operation distributes over either by factorizing
- /// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this
- /// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is
- /// a win). Returns the simplified value, or null if it didn't simplify.
+ /// \brief Tries to simplify binary operations which some other binary
+ /// operation distributes over.
+ ///
+ /// It does this by either by factorizing out common terms (eg "(A*B)+(A*C)"
+ /// -> "A*(B+C)") or expanding out if this results in simplifications (eg: "A
+ /// & (B | C) -> (A&B) | (A&C)" if this is a win). Returns the simplified
+ /// value, or null if it didn't simplify.
Value *SimplifyUsingDistributiveLaws(BinaryOperator &I);
- /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value
- /// based on the demanded bits.
+ /// \brief Attempts to replace V with a simpler value based on the demanded
+ /// bits.
Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero,
APInt &KnownOne, unsigned Depth,
- Instruction *CxtI = nullptr);
+ Instruction *CxtI);
bool SimplifyDemandedBits(Use &U, APInt DemandedMask, APInt &KnownZero,
APInt &KnownOne, unsigned Depth = 0);
/// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded
@@ -420,9 +512,8 @@ private:
APInt DemandedMask, APInt &KnownZero,
APInt &KnownOne);
- /// SimplifyDemandedInstructionBits - Inst is an integer instruction that
- /// SimplifyDemandedBits knows about. See if the instruction has any
- /// properties that allow us to simplify its operands.
+ /// \brief Tries to simplify operands to an integer instruction based on its
+ /// demanded bits.
bool SimplifyDemandedInstructionBits(Instruction &Inst);
Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
@@ -438,9 +529,8 @@ private:
//
Instruction *FoldOpIntoPhi(Instruction &I);
- // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
- // operator and they all are only used by the PHI, PHI together their
- // inputs, and do the operation once, to the result of the PHI.
+ /// \brief Try to rotate an operation below a PHI node, using PHI nodes for
+ /// its operands.
Instruction *FoldPHIArgOpIntoPHI(PHINode &PN);
Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN);
Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN);
@@ -461,8 +551,9 @@ private:
Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned);
- /// Descale - Return a value X such that Val = X * Scale, or null if none. If
- /// the multiplication is known not to overflow then NoSignedWrap is set.
+ /// \brief Returns a value X such that Val = X * Scale, or null if none.
+ ///
+ /// If the multiplication is known not to overflow then NoSignedWrap is set.
Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap);
};
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 6230c00..e7a4533 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -11,12 +11,13 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -83,7 +84,7 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
continue;
}
- if (CallSite CS = I) {
+ if (auto CS = CallSite(I)) {
// If this is the function being called then we treat it like a load and
// ignore it.
if (CS.isCallee(&U))
@@ -163,62 +164,75 @@ isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
return nullptr;
}
-Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
- // Ensure that the alloca array size argument has type intptr_t, so that
- // any casting is exposed early.
- if (DL) {
- Type *IntPtrTy = DL->getIntPtrType(AI.getType());
- if (AI.getArraySize()->getType() != IntPtrTy) {
- Value *V = Builder->CreateIntCast(AI.getArraySize(),
- IntPtrTy, false);
- AI.setOperand(0, V);
- return &AI;
- }
+static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
+ // Check for array size of 1 (scalar allocation).
+ if (!AI.isArrayAllocation()) {
+ // i32 1 is the canonical array size for scalar allocations.
+ if (AI.getArraySize()->getType()->isIntegerTy(32))
+ return nullptr;
+
+ // Canonicalize it.
+ Value *V = IC.Builder->getInt32(1);
+ AI.setOperand(0, V);
+ return &AI;
}
// Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
- if (AI.isArrayAllocation()) { // Check C != 1
- if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
- Type *NewTy =
- ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
- AllocaInst *New = Builder->CreateAlloca(NewTy, nullptr, AI.getName());
- New->setAlignment(AI.getAlignment());
-
- // Scan to the end of the allocation instructions, to skip over a block of
- // allocas if possible...also skip interleaved debug info
- //
- BasicBlock::iterator It = New;
- while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It;
-
- // Now that I is pointing to the first non-allocation-inst in the block,
- // insert our getelementptr instruction...
- //
- Type *IdxTy = DL
- ? DL->getIntPtrType(AI.getType())
- : Type::getInt64Ty(AI.getContext());
- Value *NullIdx = Constant::getNullValue(IdxTy);
- Value *Idx[2] = { NullIdx, NullIdx };
- Instruction *GEP =
+ if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
+ Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
+ AllocaInst *New = IC.Builder->CreateAlloca(NewTy, nullptr, AI.getName());
+ New->setAlignment(AI.getAlignment());
+
+ // Scan to the end of the allocation instructions, to skip over a block of
+ // allocas if possible...also skip interleaved debug info
+ //
+ BasicBlock::iterator It = New;
+ while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
+ ++It;
+
+ // Now that I is pointing to the first non-allocation-inst in the block,
+ // insert our getelementptr instruction...
+ //
+ Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
+ Value *NullIdx = Constant::getNullValue(IdxTy);
+ Value *Idx[2] = {NullIdx, NullIdx};
+ Instruction *GEP =
GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
- InsertNewInstBefore(GEP, *It);
+ IC.InsertNewInstBefore(GEP, *It);
- // Now make everything use the getelementptr instead of the original
- // allocation.
- return ReplaceInstUsesWith(AI, GEP);
- } else if (isa<UndefValue>(AI.getArraySize())) {
- return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
- }
+ // Now make everything use the getelementptr instead of the original
+ // allocation.
+ return IC.ReplaceInstUsesWith(AI, GEP);
}
- if (DL && AI.getAllocatedType()->isSized()) {
+ if (isa<UndefValue>(AI.getArraySize()))
+ return IC.ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
+
+ // Ensure that the alloca array size argument has type intptr_t, so that
+ // any casting is exposed early.
+ Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
+ if (AI.getArraySize()->getType() != IntPtrTy) {
+ Value *V = IC.Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false);
+ AI.setOperand(0, V);
+ return &AI;
+ }
+
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
+ if (auto *I = simplifyAllocaArraySize(*this, AI))
+ return I;
+
+ if (AI.getAllocatedType()->isSized()) {
// If the alignment is 0 (unspecified), assign it the preferred alignment.
if (AI.getAlignment() == 0)
- AI.setAlignment(DL->getPrefTypeAlignment(AI.getAllocatedType()));
+ AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType()));
// Move all alloca's of zero byte objects to the entry block and merge them
// together. Note that we only do this for alloca's, because malloc should
// allocate and return a unique pointer, even for a zero byte allocation.
- if (DL->getTypeAllocSize(AI.getAllocatedType()) == 0) {
+ if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) {
// For a zero sized alloca there is no point in doing an array allocation.
// This is helpful if the array size is a complicated expression not used
// elsewhere.
@@ -236,7 +250,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// dominance as the array size was forced to a constant earlier already.
AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
- DL->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
+ DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
AI.moveBefore(FirstInst);
return &AI;
}
@@ -245,7 +259,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// assign it the preferred alignment.
if (EntryAI->getAlignment() == 0)
EntryAI->setAlignment(
- DL->getPrefTypeAlignment(EntryAI->getAllocatedType()));
+ DL.getPrefTypeAlignment(EntryAI->getAllocatedType()));
// Replace this zero-sized alloca with the one at the start of the entry
// block after ensuring that the address will be aligned enough for both
// types.
@@ -269,7 +283,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
SmallVector<Instruction *, 4> ToDelete;
if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
unsigned SourceAlign = getOrEnforceKnownAlignment(
- Copy->getSource(), AI.getAlignment(), DL, AC, &AI, DT);
+ Copy->getSource(), AI.getAlignment(), DL, &AI, AC, DT);
if (AI.getAlignment() <= SourceAlign) {
DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
@@ -300,7 +314,8 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
///
/// Note that this will create all of the instructions with whatever insert
/// point the \c InstCombiner currently is using.
-static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy) {
+static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy,
+ const Twine &Suffix = "") {
Value *Ptr = LI.getPointerOperand();
unsigned AS = LI.getPointerAddressSpace();
SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
@@ -308,7 +323,8 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
LoadInst *NewLoad = IC.Builder->CreateAlignedLoad(
IC.Builder->CreateBitCast(Ptr, NewTy->getPointerTo(AS)),
- LI.getAlignment(), LI.getName());
+ LI.getAlignment(), LI.getName() + Suffix);
+ MDBuilder MDB(NewLoad->getContext());
for (const auto &MDPair : MD) {
unsigned ID = MDPair.first;
MDNode *N = MDPair.second;
@@ -335,21 +351,81 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
break;
case LLVMContext::MD_nonnull:
- // FIXME: We should translate this into range metadata for integer types
- // and vice versa.
- if (NewTy->isPointerTy())
+ // This only directly applies if the new type is also a pointer.
+ if (NewTy->isPointerTy()) {
NewLoad->setMetadata(ID, N);
+ break;
+ }
+ // If it's integral now, translate it to !range metadata.
+ if (NewTy->isIntegerTy()) {
+ auto *ITy = cast<IntegerType>(NewTy);
+ auto *NullInt = ConstantExpr::getPtrToInt(
+ ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
+ auto *NonNullInt =
+ ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
+ NewLoad->setMetadata(LLVMContext::MD_range,
+ MDB.createRange(NonNullInt, NullInt));
+ }
break;
case LLVMContext::MD_range:
// FIXME: It would be nice to propagate this in some way, but the type
- // conversions make it hard.
+ // conversions make it hard. If the new type is a pointer, we could
+ // translate it to !nonnull metadata.
break;
}
}
return NewLoad;
}
+/// \brief Combine a store to a new type.
+///
+/// Returns the newly created store instruction.
+static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) {
+ Value *Ptr = SI.getPointerOperand();
+ unsigned AS = SI.getPointerAddressSpace();
+ SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
+ SI.getAllMetadata(MD);
+
+ StoreInst *NewStore = IC.Builder->CreateAlignedStore(
+ V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
+ SI.getAlignment());
+ for (const auto &MDPair : MD) {
+ unsigned ID = MDPair.first;
+ MDNode *N = MDPair.second;
+ // Note, essentially every kind of metadata should be preserved here! This
+ // routine is supposed to clone a store instruction changing *only its
+ // type*. The only metadata it makes sense to drop is metadata which is
+ // invalidated when the pointer type changes. This should essentially
+ // never be the case in LLVM, but we explicitly switch over only known
+ // metadata to be conservatively correct. If you are adding metadata to
+ // LLVM which pertains to stores, you almost certainly want to add it
+ // here.
+ switch (ID) {
+ case LLVMContext::MD_dbg:
+ case LLVMContext::MD_tbaa:
+ case LLVMContext::MD_prof:
+ case LLVMContext::MD_fpmath:
+ case LLVMContext::MD_tbaa_struct:
+ case LLVMContext::MD_alias_scope:
+ case LLVMContext::MD_noalias:
+ case LLVMContext::MD_nontemporal:
+ case LLVMContext::MD_mem_parallel_loop_access:
+ // All of these directly apply.
+ NewStore->setMetadata(ID, N);
+ break;
+
+ case LLVMContext::MD_invariant_load:
+ case LLVMContext::MD_nonnull:
+ case LLVMContext::MD_range:
+ // These don't apply for stores.
+ break;
+ }
+ }
+
+ return NewStore;
+}
+
/// \brief Combine loads to match the type of value their uses after looking
/// through intervening bitcasts.
///
@@ -376,14 +452,48 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
if (LI.use_empty())
return nullptr;
+ Type *Ty = LI.getType();
+ const DataLayout &DL = IC.getDataLayout();
+
+ // Try to canonicalize loads which are only ever stored to operate over
+ // integers instead of any other type. We only do this when the loaded type
+ // is sized and has a size exactly the same as its store size and the store
+ // size is a legal integer type.
+ if (!Ty->isIntegerTy() && Ty->isSized() &&
+ DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) &&
+ DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty)) {
+ if (std::all_of(LI.user_begin(), LI.user_end(), [&LI](User *U) {
+ auto *SI = dyn_cast<StoreInst>(U);
+ return SI && SI->getPointerOperand() != &LI;
+ })) {
+ LoadInst *NewLoad = combineLoadToNewType(
+ IC, LI,
+ Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty)));
+ // Replace all the stores with stores of the newly loaded value.
+ for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
+ auto *SI = cast<StoreInst>(*UI++);
+ IC.Builder->SetInsertPoint(SI);
+ combineStoreToNewValue(IC, *SI, NewLoad);
+ IC.EraseInstFromFunction(*SI);
+ }
+ assert(LI.use_empty() && "Failed to remove all users of the load!");
+ // Return the old load so the combiner can delete it safely.
+ return &LI;
+ }
+ }
// Fold away bit casts of the loaded value by loading the desired type.
+ // We can do this for BitCastInsts as well as casts from and to pointer types,
+ // as long as those are noops (i.e., the source or dest type have the same
+ // bitwidth as the target's pointers).
if (LI.hasOneUse())
- if (auto *BC = dyn_cast<BitCastInst>(LI.user_back())) {
- LoadInst *NewLoad = combineLoadToNewType(IC, LI, BC->getDestTy());
- BC->replaceAllUsesWith(NewLoad);
- IC.EraseInstFromFunction(*BC);
- return &LI;
+ if (auto* CI = dyn_cast<CastInst>(LI.user_back())) {
+ if (CI->isNoopCast(DL)) {
+ LoadInst *NewLoad = combineLoadToNewType(IC, LI, CI->getDestTy());
+ CI->replaceAllUsesWith(NewLoad);
+ IC.EraseInstFromFunction(*CI);
+ return &LI;
+ }
}
// FIXME: We should also canonicalize loads of vectors when their elements are
@@ -391,6 +501,218 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
return nullptr;
}
+static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
+ // FIXME: We could probably with some care handle both volatile and atomic
+ // stores here but it isn't clear that this is important.
+ if (!LI.isSimple())
+ return nullptr;
+
+ Type *T = LI.getType();
+ if (!T->isAggregateType())
+ return nullptr;
+
+ assert(LI.getAlignment() && "Alignement must be set at this point");
+
+ if (auto *ST = dyn_cast<StructType>(T)) {
+ // If the struct only have one element, we unpack.
+ if (ST->getNumElements() == 1) {
+ LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
+ ".unpack");
+ return IC.ReplaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
+ UndefValue::get(T), NewLoad, 0, LI.getName()));
+ }
+ }
+
+ if (auto *AT = dyn_cast<ArrayType>(T)) {
+ // If the array only have one element, we unpack.
+ if (AT->getNumElements() == 1) {
+ LoadInst *NewLoad = combineLoadToNewType(IC, LI, AT->getElementType(),
+ ".unpack");
+ return IC.ReplaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
+ UndefValue::get(T), NewLoad, 0, LI.getName()));
+ }
+ }
+
+ return nullptr;
+}
+
+// If we can determine that all possible objects pointed to by the provided
+// pointer value are, not only dereferenceable, but also definitively less than
+// or equal to the provided maximum size, then return true. Otherwise, return
+// false (constant global values and allocas fall into this category).
+//
+// FIXME: This should probably live in ValueTracking (or similar).
+static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
+ const DataLayout &DL) {
+ SmallPtrSet<Value *, 4> Visited;
+ SmallVector<Value *, 4> Worklist(1, V);
+
+ do {
+ Value *P = Worklist.pop_back_val();
+ P = P->stripPointerCasts();
+
+ if (!Visited.insert(P).second)
+ continue;
+
+ if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
+ Worklist.push_back(SI->getTrueValue());
+ Worklist.push_back(SI->getFalseValue());
+ continue;
+ }
+
+ if (PHINode *PN = dyn_cast<PHINode>(P)) {
+ for (Value *IncValue : PN->incoming_values())
+ Worklist.push_back(IncValue);
+ continue;
+ }
+
+ if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
+ if (GA->mayBeOverridden())
+ return false;
+ Worklist.push_back(GA->getAliasee());
+ continue;
+ }
+
+ // If we know how big this object is, and it is less than MaxSize, continue
+ // searching. Otherwise, return false.
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
+ if (!AI->getAllocatedType()->isSized())
+ return false;
+
+ ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
+ if (!CS)
+ return false;
+
+ uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType());
+ // Make sure that, even if the multiplication below would wrap as an
+ // uint64_t, we still do the right thing.
+ if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
+ return false;
+ continue;
+ }
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
+ if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
+ return false;
+
+ uint64_t InitSize = DL.getTypeAllocSize(GV->getType()->getElementType());
+ if (InitSize > MaxSize)
+ return false;
+ continue;
+ }
+
+ return false;
+ } while (!Worklist.empty());
+
+ return true;
+}
+
+// If we're indexing into an object of a known size, and the outer index is
+// not a constant, but having any value but zero would lead to undefined
+// behavior, replace it with zero.
+//
+// For example, if we have:
+// @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
+// ...
+// %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
+// ... = load i32* %arrayidx, align 4
+// Then we know that we can replace %x in the GEP with i64 0.
+//
+// FIXME: We could fold any GEP index to zero that would cause UB if it were
+// not zero. Currently, we only handle the first such index. Also, we could
+// also search through non-zero constant indices if we kept track of the
+// offsets those indices implied.
+static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
+ Instruction *MemI, unsigned &Idx) {
+ if (GEPI->getNumOperands() < 2)
+ return false;
+
+ // Find the first non-zero index of a GEP. If all indices are zero, return
+ // one past the last index.
+ auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
+ unsigned I = 1;
+ for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
+ Value *V = GEPI->getOperand(I);
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
+ if (CI->isZero())
+ continue;
+
+ break;
+ }
+
+ return I;
+ };
+
+ // Skip through initial 'zero' indices, and find the corresponding pointer
+ // type. See if the next index is not a constant.
+ Idx = FirstNZIdx(GEPI);
+ if (Idx == GEPI->getNumOperands())
+ return false;
+ if (isa<Constant>(GEPI->getOperand(Idx)))
+ return false;
+
+ SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
+ Type *AllocTy = GetElementPtrInst::getIndexedType(
+ cast<PointerType>(GEPI->getOperand(0)->getType()->getScalarType())
+ ->getElementType(),
+ Ops);
+ if (!AllocTy || !AllocTy->isSized())
+ return false;
+ const DataLayout &DL = IC.getDataLayout();
+ uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy);
+
+ // If there are more indices after the one we might replace with a zero, make
+ // sure they're all non-negative. If any of them are negative, the overall
+ // address being computed might be before the base address determined by the
+ // first non-zero index.
+ auto IsAllNonNegative = [&]() {
+ for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
+ bool KnownNonNegative, KnownNegative;
+ IC.ComputeSignBit(GEPI->getOperand(i), KnownNonNegative,
+ KnownNegative, 0, MemI);
+ if (KnownNonNegative)
+ continue;
+ return false;
+ }
+
+ return true;
+ };
+
+ // FIXME: If the GEP is not inbounds, and there are extra indices after the
+ // one we'll replace, those could cause the address computation to wrap
+ // (rendering the IsAllNonNegative() check below insufficient). We can do
+ // better, ignoring zero indicies (and other indicies we can prove small
+ // enough not to wrap).
+ if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
+ return false;
+
+ // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
+ // also known to be dereferenceable.
+ return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
+ IsAllNonNegative();
+}
+
+// If we're indexing into an object with a variable index for the memory
+// access, but the object has only one element, we can assume that the index
+// will always be zero. If we replace the GEP, return it.
+template <typename T>
+static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr,
+ T &MemI) {
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
+ unsigned Idx;
+ if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
+ Instruction *NewGEPI = GEPI->clone();
+ NewGEPI->setOperand(Idx,
+ ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
+ NewGEPI->insertBefore(GEPI);
+ MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
+ return NewGEPI;
+ }
+ }
+
+ return nullptr;
+}
+
Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
Value *Op = LI.getOperand(0);
@@ -399,23 +721,30 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
return Res;
// Attempt to improve the alignment.
- if (DL) {
- unsigned KnownAlign = getOrEnforceKnownAlignment(
- Op, DL->getPrefTypeAlignment(LI.getType()), DL, AC, &LI, DT);
- unsigned LoadAlign = LI.getAlignment();
- unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
- DL->getABITypeAlignment(LI.getType());
-
- if (KnownAlign > EffectiveLoadAlign)
- LI.setAlignment(KnownAlign);
- else if (LoadAlign == 0)
- LI.setAlignment(EffectiveLoadAlign);
+ unsigned KnownAlign = getOrEnforceKnownAlignment(
+ Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, AC, DT);
+ unsigned LoadAlign = LI.getAlignment();
+ unsigned EffectiveLoadAlign =
+ LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType());
+
+ if (KnownAlign > EffectiveLoadAlign)
+ LI.setAlignment(KnownAlign);
+ else if (LoadAlign == 0)
+ LI.setAlignment(EffectiveLoadAlign);
+
+ // Replace GEP indices if possible.
+ if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
+ Worklist.Add(NewGEPI);
+ return &LI;
}
// None of the following transforms are legal for volatile/atomic loads.
// FIXME: Some of it is okay for atomic loads; needs refactoring.
if (!LI.isSimple()) return nullptr;
+ if (Instruction *Res = unpackLoadToAggregate(*this, LI))
+ return Res;
+
// Do really simple store-to-load forwarding and load CSE, to catch cases
// where there are several consecutive memory accesses to the same location,
// separated by a few arithmetic operations.
@@ -466,8 +795,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
// load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
unsigned Align = LI.getAlignment();
- if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, DL) &&
- isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, DL)) {
+ if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align) &&
+ isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align)) {
LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
SI->getOperand(1)->getName()+".val");
LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
@@ -521,50 +850,12 @@ static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
if (!SI.isSimple())
return false;
- Value *Ptr = SI.getPointerOperand();
Value *V = SI.getValueOperand();
- unsigned AS = SI.getPointerAddressSpace();
- SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
- SI.getAllMetadata(MD);
// Fold away bit casts of the stored value by storing the original type.
if (auto *BC = dyn_cast<BitCastInst>(V)) {
V = BC->getOperand(0);
- StoreInst *NewStore = IC.Builder->CreateAlignedStore(
- V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
- SI.getAlignment());
- for (const auto &MDPair : MD) {
- unsigned ID = MDPair.first;
- MDNode *N = MDPair.second;
- // Note, essentially every kind of metadata should be preserved here! This
- // routine is supposed to clone a store instruction changing *only its
- // type*. The only metadata it makes sense to drop is metadata which is
- // invalidated when the pointer type changes. This should essentially
- // never be the case in LLVM, but we explicitly switch over only known
- // metadata to be conservatively correct. If you are adding metadata to
- // LLVM which pertains to stores, you almost certainly want to add it
- // here.
- switch (ID) {
- case LLVMContext::MD_dbg:
- case LLVMContext::MD_tbaa:
- case LLVMContext::MD_prof:
- case LLVMContext::MD_fpmath:
- case LLVMContext::MD_tbaa_struct:
- case LLVMContext::MD_alias_scope:
- case LLVMContext::MD_noalias:
- case LLVMContext::MD_nontemporal:
- case LLVMContext::MD_mem_parallel_loop_access:
- // All of these directly apply.
- NewStore->setMetadata(ID, N);
- break;
-
- case LLVMContext::MD_invariant_load:
- case LLVMContext::MD_nonnull:
- case LLVMContext::MD_range:
- // These don't apply for stores.
- break;
- }
- }
+ combineStoreToNewValue(IC, SI, V);
return true;
}
@@ -573,6 +864,39 @@ static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
return false;
}
+static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
+ // FIXME: We could probably with some care handle both volatile and atomic
+ // stores here but it isn't clear that this is important.
+ if (!SI.isSimple())
+ return false;
+
+ Value *V = SI.getValueOperand();
+ Type *T = V->getType();
+
+ if (!T->isAggregateType())
+ return false;
+
+ if (auto *ST = dyn_cast<StructType>(T)) {
+ // If the struct only have one element, we unpack.
+ if (ST->getNumElements() == 1) {
+ V = IC.Builder->CreateExtractValue(V, 0);
+ combineStoreToNewValue(IC, SI, V);
+ return true;
+ }
+ }
+
+ if (auto *AT = dyn_cast<ArrayType>(T)) {
+ // If the array only have one element, we unpack.
+ if (AT->getNumElements() == 1) {
+ V = IC.Builder->CreateExtractValue(V, 0);
+ combineStoreToNewValue(IC, SI, V);
+ return true;
+ }
+ }
+
+ return false;
+}
+
/// equivalentAddressValues - Test if A and B will obviously have the same
/// value. This includes recognizing that %t0 and %t1 will have the same
/// value in code like this:
@@ -611,17 +935,25 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
return EraseInstFromFunction(SI);
// Attempt to improve the alignment.
- if (DL) {
- unsigned KnownAlign = getOrEnforceKnownAlignment(
- Ptr, DL->getPrefTypeAlignment(Val->getType()), DL, AC, &SI, DT);
- unsigned StoreAlign = SI.getAlignment();
- unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
- DL->getABITypeAlignment(Val->getType());
-
- if (KnownAlign > EffectiveStoreAlign)
- SI.setAlignment(KnownAlign);
- else if (StoreAlign == 0)
- SI.setAlignment(EffectiveStoreAlign);
+ unsigned KnownAlign = getOrEnforceKnownAlignment(
+ Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, AC, DT);
+ unsigned StoreAlign = SI.getAlignment();
+ unsigned EffectiveStoreAlign =
+ StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType());
+
+ if (KnownAlign > EffectiveStoreAlign)
+ SI.setAlignment(KnownAlign);
+ else if (StoreAlign == 0)
+ SI.setAlignment(EffectiveStoreAlign);
+
+ // Try to canonicalize the stored type.
+ if (unpackStoreToAggregate(*this, SI))
+ return EraseInstFromFunction(SI);
+
+ // Replace GEP indices if possible.
+ if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
+ Worklist.Add(NewGEPI);
+ return &SI;
}
// Don't hack volatile/atomic stores.
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index b2ff96f..a554e9f 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -12,7 +12,7 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
@@ -26,7 +26,7 @@ using namespace PatternMatch;
/// where it is known to be non-zero. If this allows us to simplify the
/// computation, do so and return the new operand, otherwise return null.
static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC,
- Instruction *CxtI) {
+ Instruction &CxtI) {
// If V has multiple uses, then we would have to do more analysis to determine
// if this is safe. For example, the use could be in dynamically unreached
// code.
@@ -47,8 +47,8 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC,
// inexact. Similarly for <<.
if (BinaryOperator *I = dyn_cast<BinaryOperator>(V))
if (I->isLogicalShift() &&
- isKnownToBeAPowerOfTwo(I->getOperand(0), false, 0,
- IC.getAssumptionCache(), CxtI,
+ isKnownToBeAPowerOfTwo(I->getOperand(0), IC.getDataLayout(), false, 0,
+ IC.getAssumptionCache(), &CxtI,
IC.getDominatorTree())) {
// We know that this is an exact/nuw shift and that the input is a
// non-zero context as well.
@@ -126,7 +126,7 @@ static Constant *getLogBase2Vector(ConstantDataVector *CV) {
/// \brief Return true if we can prove that:
/// (mul LHS, RHS) === (mul nsw LHS, RHS)
bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS,
- Instruction *CxtI) {
+ Instruction &CxtI) {
// Multiplying n * m significant bits yields a result of n + m significant
// bits. If the total number of significant bits does not exceed the
// result bit width (minus 1), there is no overflow.
@@ -137,8 +137,8 @@ bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS,
// Note that underestimating the number of sign bits gives a more
// conservative answer.
- unsigned SignBits = ComputeNumSignBits(LHS, 0, CxtI) +
- ComputeNumSignBits(RHS, 0, CxtI);
+ unsigned SignBits =
+ ComputeNumSignBits(LHS, 0, &CxtI) + ComputeNumSignBits(RHS, 0, &CxtI);
// First handle the easy case: if we have enough sign bits there's
// definitely no overflow.
@@ -157,8 +157,8 @@ bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS,
// For simplicity we just check if at least one side is not negative.
bool LHSNonNegative, LHSNegative;
bool RHSNonNegative, RHSNegative;
- ComputeSignBit(LHS, LHSNonNegative, LHSNegative, /*Depth=*/0, CxtI);
- ComputeSignBit(RHS, RHSNonNegative, RHSNegative, /*Depth=*/0, CxtI);
+ ComputeSignBit(LHS, LHSNonNegative, LHSNegative, /*Depth=*/0, &CxtI);
+ ComputeSignBit(RHS, RHSNonNegative, RHSNegative, /*Depth=*/0, &CxtI);
if (LHSNonNegative || RHSNonNegative)
return true;
}
@@ -217,12 +217,16 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
NewCst = getLogBase2Vector(CV);
if (NewCst) {
+ unsigned Width = NewCst->getType()->getPrimitiveSizeInBits();
BinaryOperator *Shl = BinaryOperator::CreateShl(NewOp, NewCst);
if (I.hasNoUnsignedWrap())
Shl->setHasNoUnsignedWrap();
- if (I.hasNoSignedWrap() && NewCst->isNotMinSignedValue())
- Shl->setHasNoSignedWrap();
+ if (I.hasNoSignedWrap()) {
+ uint64_t V;
+ if (match(NewCst, m_ConstantInt(V)) && V != Width - 1)
+ Shl->setHasNoSignedWrap();
+ }
return Shl;
}
@@ -375,7 +379,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
}
}
- if (!I.hasNoSignedWrap() && WillNotOverflowSignedMul(Op0, Op1, &I)) {
+ if (!I.hasNoSignedWrap() && WillNotOverflowSignedMul(Op0, Op1, I)) {
Changed = true;
I.setHasNoSignedWrap(true);
}
@@ -422,7 +426,7 @@ static bool isFiniteNonZeroFp(Constant *C) {
if (C->getType()->isVectorTy()) {
for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E;
++I) {
- ConstantFP *CFP = dyn_cast<ConstantFP>(C->getAggregateElement(I));
+ ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(C->getAggregateElement(I));
if (!CFP || !CFP->getValueAPF().isFiniteNonZero())
return false;
}
@@ -437,7 +441,7 @@ static bool isNormalFp(Constant *C) {
if (C->getType()->isVectorTy()) {
for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E;
++I) {
- ConstantFP *CFP = dyn_cast<ConstantFP>(C->getAggregateElement(I));
+ ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(C->getAggregateElement(I));
if (!CFP || !CFP->getValueAPF().isNormal())
return false;
}
@@ -780,7 +784,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
// The RHS is known non-zero.
- if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, &I)) {
+ if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I)) {
I.setOperand(1, V);
return &I;
}
@@ -1155,7 +1159,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
return BO;
}
- if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, AC, &I, DT)) {
+ if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, AC, &I, DT)) {
// X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
// Safe because the only negative value (1 << Y) can take on is
// INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
@@ -1206,7 +1210,8 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return ReplaceInstUsesWith(I, V);
- if (Value *V = SimplifyFDivInst(Op0, Op1, DL, TLI, DT, AC))
+ if (Value *V = SimplifyFDivInst(Op0, Op1, I.getFastMathFlags(),
+ DL, TLI, DT, AC))
return ReplaceInstUsesWith(I, V);
if (isa<Constant>(Op0))
@@ -1337,7 +1342,7 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
// The RHS is known non-zero.
- if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, &I)) {
+ if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I)) {
I.setOperand(1, V);
return &I;
}
@@ -1384,7 +1389,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
I.getType());
// X urem Y -> X and Y-1, where Y is a power of 2,
- if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, AC, &I, DT)) {
+ if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, AC, &I, DT)) {
Constant *N1 = Constant::getAllOnesValue(I.getType());
Value *Add = Builder->CreateAdd(Op1, N1);
return BinaryOperator::CreateAnd(Op0, Add);
@@ -1481,7 +1486,8 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return ReplaceInstUsesWith(I, V);
- if (Value *V = SimplifyFRemInst(Op0, Op1, DL, TLI, DT, AC))
+ if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(),
+ DL, TLI, DT, AC))
return ReplaceInstUsesWith(I, V);
// Handle cases involving: rem X, (select Cond, Y, Z)
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 53831c8..6a6693c 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -11,11 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/IR/DataLayout.h"
using namespace llvm;
#define DEBUG_TYPE "instcombine"
@@ -231,7 +230,8 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
Value *Base = FixedOperands[0];
GetElementPtrInst *NewGEP =
- GetElementPtrInst::Create(Base, makeArrayRef(FixedOperands).slice(1));
+ GetElementPtrInst::Create(FirstInst->getSourceElementType(), Base,
+ makeArrayRef(FixedOperands).slice(1));
if (AllInBounds) NewGEP->setIsInBounds();
NewGEP->setDebugLoc(FirstInst->getDebugLoc());
return NewGEP;
@@ -375,8 +375,8 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
// and mark all the input loads as non-volatile. If we don't do this, we will
// insert a new volatile load and the old ones will not be deletable.
if (isVolatile)
- for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
- cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false);
+ for (Value *IncValue : PN.incoming_values())
+ cast<LoadInst>(IncValue)->setVolatile(false);
LoadInst *NewLI = new LoadInst(PhiVal, "", isVolatile, LoadAlignment);
NewLI->setDebugLoc(FirstLI->getDebugLoc());
@@ -539,8 +539,7 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
// Scan the operands to see if they are either phi nodes or are equal to
// the value.
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Value *Op = PN->getIncomingValue(i);
+ for (Value *Op : PN->incoming_values()) {
if (PHINode *OpPN = dyn_cast<PHINode>(Op)) {
if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs))
return false;
@@ -891,8 +890,8 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
// it is only used by trunc or trunc(lshr) operations. If so, we split the
// PHI into the various pieces being extracted. This sort of thing is
// introduced when SROA promotes an aggregate to a single large integer type.
- if (PN.getType()->isIntegerTy() && DL &&
- !DL->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
+ if (PN.getType()->isIntegerTy() &&
+ !DL.isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
return Res;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index bf3c33e..f51442a 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -11,88 +11,55 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/PatternMatch.h"
using namespace llvm;
using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
-/// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms,
-/// returning the kind and providing the out parameter results if we
-/// successfully match.
static SelectPatternFlavor
-MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) {
- SelectInst *SI = dyn_cast<SelectInst>(V);
- if (!SI) return SPF_UNKNOWN;
-
- ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition());
- if (!ICI) return SPF_UNKNOWN;
-
- ICmpInst::Predicate Pred = ICI->getPredicate();
- Value *CmpLHS = ICI->getOperand(0);
- Value *CmpRHS = ICI->getOperand(1);
- Value *TrueVal = SI->getTrueValue();
- Value *FalseVal = SI->getFalseValue();
-
- LHS = CmpLHS;
- RHS = CmpRHS;
-
- // (icmp X, Y) ? X : Y
- if (TrueVal == CmpLHS && FalseVal == CmpRHS) {
- switch (Pred) {
- default: return SPF_UNKNOWN; // Equality.
- case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_UGE: return SPF_UMAX;
- case ICmpInst::ICMP_SGT:
- case ICmpInst::ICMP_SGE: return SPF_SMAX;
- case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_ULE: return SPF_UMIN;
- case ICmpInst::ICMP_SLT:
- case ICmpInst::ICMP_SLE: return SPF_SMIN;
- }
- }
-
- // (icmp X, Y) ? Y : X
- if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
- switch (Pred) {
- default: return SPF_UNKNOWN; // Equality.
- case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_UGE: return SPF_UMIN;
- case ICmpInst::ICMP_SGT:
- case ICmpInst::ICMP_SGE: return SPF_SMIN;
- case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_ULE: return SPF_UMAX;
- case ICmpInst::ICMP_SLT:
- case ICmpInst::ICMP_SLE: return SPF_SMAX;
- }
+getInverseMinMaxSelectPattern(SelectPatternFlavor SPF) {
+ switch (SPF) {
+ default:
+ llvm_unreachable("unhandled!");
+
+ case SPF_SMIN:
+ return SPF_SMAX;
+ case SPF_UMIN:
+ return SPF_UMAX;
+ case SPF_SMAX:
+ return SPF_SMIN;
+ case SPF_UMAX:
+ return SPF_UMIN;
}
+}
- if (ConstantInt *C1 = dyn_cast<ConstantInt>(CmpRHS)) {
- if ((CmpLHS == TrueVal && match(FalseVal, m_Neg(m_Specific(CmpLHS)))) ||
- (CmpLHS == FalseVal && match(TrueVal, m_Neg(m_Specific(CmpLHS))))) {
-
- // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X
- // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X
- if (Pred == ICmpInst::ICMP_SGT && (C1->isZero() || C1->isMinusOne())) {
- return (CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS;
- }
-
- // ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X
- // NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X
- if (Pred == ICmpInst::ICMP_SLT && (C1->isZero() || C1->isOne())) {
- return (CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS;
- }
- }
+static CmpInst::Predicate getICmpPredicateForMinMax(SelectPatternFlavor SPF) {
+ switch (SPF) {
+ default:
+ llvm_unreachable("unhandled!");
+
+ case SPF_SMIN:
+ return ICmpInst::ICMP_SLT;
+ case SPF_UMIN:
+ return ICmpInst::ICMP_ULT;
+ case SPF_SMAX:
+ return ICmpInst::ICMP_SGT;
+ case SPF_UMAX:
+ return ICmpInst::ICMP_UGT;
}
-
- // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5)
-
- return SPF_UNKNOWN;
}
+static Value *generateMinMaxSelectPattern(InstCombiner::BuilderTy *Builder,
+ SelectPatternFlavor SPF, Value *A,
+ Value *B) {
+ CmpInst::Predicate Pred = getICmpPredicateForMinMax(SPF);
+ return Builder->CreateSelect(Builder->CreateICmp(Pred, A, B), A, B);
+}
/// GetSelectFoldableOperands - We want to turn code that looks like this:
/// %C = or %A, %B
@@ -309,72 +276,6 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
return nullptr;
}
-/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
-/// replaced with RepOp.
-static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
- const DataLayout *TD,
- const TargetLibraryInfo *TLI,
- DominatorTree *DT, AssumptionCache *AC) {
- // Trivial replacement.
- if (V == Op)
- return RepOp;
-
- Instruction *I = dyn_cast<Instruction>(V);
- if (!I)
- return nullptr;
-
- // If this is a binary operator, try to simplify it with the replaced op.
- if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) {
- if (B->getOperand(0) == Op)
- return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD, TLI);
- if (B->getOperand(1) == Op)
- return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD, TLI);
- }
-
- // Same for CmpInsts.
- if (CmpInst *C = dyn_cast<CmpInst>(I)) {
- if (C->getOperand(0) == Op)
- return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD,
- TLI, DT, AC);
- if (C->getOperand(1) == Op)
- return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD,
- TLI, DT, AC);
- }
-
- // TODO: We could hand off more cases to instsimplify here.
-
- // If all operands are constant after substituting Op for RepOp then we can
- // constant fold the instruction.
- if (Constant *CRepOp = dyn_cast<Constant>(RepOp)) {
- // Build a list of all constant operands.
- SmallVector<Constant*, 8> ConstOps;
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- if (I->getOperand(i) == Op)
- ConstOps.push_back(CRepOp);
- else if (Constant *COp = dyn_cast<Constant>(I->getOperand(i)))
- ConstOps.push_back(COp);
- else
- break;
- }
-
- // All operands were constants, fold it.
- if (ConstOps.size() == I->getNumOperands()) {
- if (CmpInst *C = dyn_cast<CmpInst>(I))
- return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0],
- ConstOps[1], TD, TLI);
-
- if (LoadInst *LI = dyn_cast<LoadInst>(I))
- if (!LI->isVolatile())
- return ConstantFoldLoadFromConstPtr(ConstOps[0], TD);
-
- return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
- ConstOps, TD, TLI);
- }
- }
-
- return nullptr;
-}
-
/// foldSelectICmpAndOr - We want to turn:
/// (select (icmp eq (and X, C1), 0), Y, (or Y, C2))
/// into:
@@ -437,6 +338,62 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
return Builder->CreateOr(V, Y);
}
+/// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single
+/// call to cttz/ctlz with flag 'is_zero_undef' cleared.
+///
+/// For example, we can fold the following code sequence:
+/// \code
+/// %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+/// %1 = icmp ne i32 %x, 0
+/// %2 = select i1 %1, i32 %0, i32 32
+/// \code
+///
+/// into:
+/// %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
+static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
+ InstCombiner::BuilderTy *Builder) {
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+ Value *CmpLHS = ICI->getOperand(0);
+ Value *CmpRHS = ICI->getOperand(1);
+
+ // Check if the condition value compares a value for equality against zero.
+ if (!ICI->isEquality() || !match(CmpRHS, m_Zero()))
+ return nullptr;
+
+ Value *Count = FalseVal;
+ Value *ValueOnZero = TrueVal;
+ if (Pred == ICmpInst::ICMP_NE)
+ std::swap(Count, ValueOnZero);
+
+ // Skip zero extend/truncate.
+ Value *V = nullptr;
+ if (match(Count, m_ZExt(m_Value(V))) ||
+ match(Count, m_Trunc(m_Value(V))))
+ Count = V;
+
+ // Check if the value propagated on zero is a constant number equal to the
+ // sizeof in bits of 'Count'.
+ unsigned SizeOfInBits = Count->getType()->getScalarSizeInBits();
+ if (!match(ValueOnZero, m_SpecificInt(SizeOfInBits)))
+ return nullptr;
+
+ // Check that 'Count' is a call to intrinsic cttz/ctlz. Also check that the
+ // input to the cttz/ctlz is used as LHS for the compare instruction.
+ if (match(Count, m_Intrinsic<Intrinsic::cttz>(m_Specific(CmpLHS))) ||
+ match(Count, m_Intrinsic<Intrinsic::ctlz>(m_Specific(CmpLHS)))) {
+ IntrinsicInst *II = cast<IntrinsicInst>(Count);
+ IRBuilder<> Builder(II);
+ // Explicitly clear the 'undef_on_zero' flag.
+ IntrinsicInst *NewI = cast<IntrinsicInst>(II->clone());
+ Type *Ty = NewI->getArgOperand(1)->getType();
+ NewI->setArgOperand(1, Constant::getNullValue(Ty));
+ Builder.Insert(NewI);
+ return Builder.CreateZExtOrTrunc(NewI, ValueOnZero->getType());
+ }
+
+ return nullptr;
+}
+
/// visitSelectInstWithICmp - Visit a SelectInst that has an
/// ICmpInst as its first operand.
///
@@ -454,14 +411,6 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
// here, so make sure the select is the only user.
if (ICI->hasOneUse())
if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) {
- // X < MIN ? T : F --> F
- if ((Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT)
- && CI->isMinValue(Pred == ICmpInst::ICMP_SLT))
- return ReplaceInstUsesWith(SI, FalseVal);
- // X > MAX ? T : F --> F
- else if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT)
- && CI->isMaxValue(Pred == ICmpInst::ICMP_SGT))
- return ReplaceInstUsesWith(SI, FalseVal);
switch (Pred) {
default: break;
case ICmpInst::ICMP_ULT:
@@ -575,33 +524,6 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
}
}
- // If we have an equality comparison then we know the value in one of the
- // arms of the select. See if substituting this value into the arm and
- // simplifying the result yields the same value as the other arm.
- if (Pred == ICmpInst::ICMP_EQ) {
- if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) ==
- TrueVal ||
- SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) ==
- TrueVal)
- return ReplaceInstUsesWith(SI, FalseVal);
- if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) ==
- FalseVal ||
- SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) ==
- FalseVal)
- return ReplaceInstUsesWith(SI, FalseVal);
- } else if (Pred == ICmpInst::ICMP_NE) {
- if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) ==
- FalseVal ||
- SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) ==
- FalseVal)
- return ReplaceInstUsesWith(SI, TrueVal);
- if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) ==
- TrueVal ||
- SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) ==
- TrueVal)
- return ReplaceInstUsesWith(SI, TrueVal);
- }
-
// NOTE: if we wanted to, this is where to detect integer MIN/MAX
if (CmpRHS != CmpLHS && isa<Constant>(CmpRHS)) {
@@ -616,7 +538,8 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
}
}
- if (unsigned BitWidth = TrueVal->getType()->getScalarSizeInBits()) {
+ {
+ unsigned BitWidth = DL.getTypeSizeInBits(TrueVal->getType());
APInt MinSignedValue = APInt::getSignBit(BitWidth);
Value *X;
const APInt *Y, *C;
@@ -665,6 +588,9 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
if (Value *V = foldSelectICmpAndOr(SI, TrueVal, FalseVal, Builder))
return ReplaceInstUsesWith(SI, V);
+ if (Value *V = foldSelectCttzCtlz(ICI, TrueVal, FalseVal, Builder))
+ return ReplaceInstUsesWith(SI, V);
+
return Changed ? &SI : nullptr;
}
@@ -770,6 +696,52 @@ Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
SI->getCondition(), SI->getFalseValue(), SI->getTrueValue());
return ReplaceInstUsesWith(Outer, NewSI);
}
+
+ auto IsFreeOrProfitableToInvert =
+ [&](Value *V, Value *&NotV, bool &ElidesXor) {
+ if (match(V, m_Not(m_Value(NotV)))) {
+ // If V has at most 2 uses then we can get rid of the xor operation
+ // entirely.
+ ElidesXor |= !V->hasNUsesOrMore(3);
+ return true;
+ }
+
+ if (IsFreeToInvert(V, !V->hasNUsesOrMore(3))) {
+ NotV = nullptr;
+ return true;
+ }
+
+ return false;
+ };
+
+ Value *NotA, *NotB, *NotC;
+ bool ElidesXor = false;
+
+ // MIN(MIN(~A, ~B), ~C) == ~MAX(MAX(A, B), C)
+ // MIN(MAX(~A, ~B), ~C) == ~MAX(MIN(A, B), C)
+ // MAX(MIN(~A, ~B), ~C) == ~MIN(MAX(A, B), C)
+ // MAX(MAX(~A, ~B), ~C) == ~MIN(MIN(A, B), C)
+ //
+ // This transform is performance neutral if we can elide at least one xor from
+ // the set of three operands, since we'll be tacking on an xor at the very
+ // end.
+ if (IsFreeOrProfitableToInvert(A, NotA, ElidesXor) &&
+ IsFreeOrProfitableToInvert(B, NotB, ElidesXor) &&
+ IsFreeOrProfitableToInvert(C, NotC, ElidesXor) && ElidesXor) {
+ if (!NotA)
+ NotA = Builder->CreateNot(A);
+ if (!NotB)
+ NotB = Builder->CreateNot(B);
+ if (!NotC)
+ NotC = Builder->CreateNot(C);
+
+ Value *NewInner = generateMinMaxSelectPattern(
+ Builder, getInverseMinMaxSelectPattern(SPF1), NotA, NotB);
+ Value *NewOuter = Builder->CreateNot(generateMinMaxSelectPattern(
+ Builder, getInverseMinMaxSelectPattern(SPF2), NewInner, NotC));
+ return ReplaceInstUsesWith(Outer, NewOuter);
+ }
+
return nullptr;
}
@@ -868,7 +840,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return BinaryOperator::CreateAnd(NotCond, FalseVal);
}
if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
- if (C->getZExtValue() == false) {
+ if (!C->getZExtValue()) {
// Change: A = select B, C, false --> A = and B, C
return BinaryOperator::CreateAnd(CondVal, TrueVal);
}
@@ -1082,26 +1054,67 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
}
// See if we can fold the select into one of our operands.
- if (SI.getType()->isIntegerTy()) {
+ if (SI.getType()->isIntOrIntVectorTy()) {
if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal))
return FoldI;
- // MAX(MAX(a, b), a) -> MAX(a, b)
- // MIN(MIN(a, b), a) -> MIN(a, b)
- // MAX(MIN(a, b), a) -> a
- // MIN(MAX(a, b), a) -> a
Value *LHS, *RHS, *LHS2, *RHS2;
- if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) {
- if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2))
+ Instruction::CastOps CastOp;
+ SelectPatternFlavor SPF = matchSelectPattern(&SI, LHS, RHS, &CastOp);
+
+ if (SPF) {
+ // Canonicalize so that type casts are outside select patterns.
+ if (LHS->getType()->getPrimitiveSizeInBits() !=
+ SI.getType()->getPrimitiveSizeInBits()) {
+ CmpInst::Predicate Pred = getICmpPredicateForMinMax(SPF);
+ Value *Cmp = Builder->CreateICmp(Pred, LHS, RHS);
+ Value *NewSI = Builder->CreateCast(CastOp,
+ Builder->CreateSelect(Cmp, LHS, RHS),
+ SI.getType());
+ return ReplaceInstUsesWith(SI, NewSI);
+ }
+
+ // MAX(MAX(a, b), a) -> MAX(a, b)
+ // MIN(MIN(a, b), a) -> MIN(a, b)
+ // MAX(MIN(a, b), a) -> a
+ // MIN(MAX(a, b), a) -> a
+ if (SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2))
if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
SI, SPF, RHS))
return R;
- if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2))
+ if (SelectPatternFlavor SPF2 = matchSelectPattern(RHS, LHS2, RHS2))
if (Instruction *R = FoldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2,
SI, SPF, LHS))
return R;
}
+ // MAX(~a, ~b) -> ~MIN(a, b)
+ if (SPF == SPF_SMAX || SPF == SPF_UMAX) {
+ if (IsFreeToInvert(LHS, LHS->hasNUses(2)) &&
+ IsFreeToInvert(RHS, RHS->hasNUses(2))) {
+
+ // This transform adds a xor operation and that extra cost needs to be
+ // justified. We look for simplifications that will result from
+ // applying this rule:
+
+ bool Profitable =
+ (LHS->hasNUses(2) && match(LHS, m_Not(m_Value()))) ||
+ (RHS->hasNUses(2) && match(RHS, m_Not(m_Value()))) ||
+ (SI.hasOneUse() && match(*SI.user_begin(), m_Not(m_Value())));
+
+ if (Profitable) {
+ Value *NewLHS = Builder->CreateNot(LHS);
+ Value *NewRHS = Builder->CreateNot(RHS);
+ Value *NewCmp = SPF == SPF_SMAX
+ ? Builder->CreateICmpSLT(NewLHS, NewRHS)
+ : Builder->CreateICmpULT(NewLHS, NewRHS);
+ Value *NewSI =
+ Builder->CreateNot(Builder->CreateSelect(NewCmp, NewLHS, NewRHS));
+ return ReplaceInstUsesWith(SI, NewSI);
+ }
+ }
+ }
+
// TODO.
// ABS(-X) -> ABS(X)
}
@@ -1115,19 +1128,41 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return NV;
if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) {
- if (TrueSI->getCondition() == CondVal) {
- if (SI.getTrueValue() == TrueSI->getTrueValue())
- return nullptr;
- SI.setOperand(1, TrueSI->getTrueValue());
- return &SI;
+ if (TrueSI->getCondition()->getType() == CondVal->getType()) {
+ // select(C, select(C, a, b), c) -> select(C, a, c)
+ if (TrueSI->getCondition() == CondVal) {
+ if (SI.getTrueValue() == TrueSI->getTrueValue())
+ return nullptr;
+ SI.setOperand(1, TrueSI->getTrueValue());
+ return &SI;
+ }
+ // select(C0, select(C1, a, b), b) -> select(C0&C1, a, b)
+ // We choose this as normal form to enable folding on the And and shortening
+ // paths for the values (this helps GetUnderlyingObjects() for example).
+ if (TrueSI->getFalseValue() == FalseVal && TrueSI->hasOneUse()) {
+ Value *And = Builder->CreateAnd(CondVal, TrueSI->getCondition());
+ SI.setOperand(0, And);
+ SI.setOperand(1, TrueSI->getTrueValue());
+ return &SI;
+ }
}
}
if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) {
- if (FalseSI->getCondition() == CondVal) {
- if (SI.getFalseValue() == FalseSI->getFalseValue())
- return nullptr;
- SI.setOperand(2, FalseSI->getFalseValue());
- return &SI;
+ if (FalseSI->getCondition()->getType() == CondVal->getType()) {
+ // select(C, a, select(C, b, c)) -> select(C, a, c)
+ if (FalseSI->getCondition() == CondVal) {
+ if (SI.getFalseValue() == FalseSI->getFalseValue())
+ return nullptr;
+ SI.setOperand(2, FalseSI->getFalseValue());
+ return &SI;
+ }
+ // select(C0, a, select(C1, a, b)) -> select(C0|C1, a, b)
+ if (FalseSI->getTrueValue() == TrueVal && FalseSI->hasOneUse()) {
+ Value *Or = Builder->CreateOr(CondVal, FalseSI->getCondition());
+ SI.setOperand(0, Or);
+ SI.setOperand(2, FalseSI->getFalseValue());
+ return &SI;
+ }
}
}
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 0a16e25..d04ed58 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -175,8 +175,8 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
// get into trouble with cyclic PHIs here because we only consider
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (!CanEvaluateShifted(PN->getIncomingValue(i), NumBits, isLeftShift,
+ for (Value *IncValue : PN->incoming_values())
+ if (!CanEvaluateShifted(IncValue, NumBits, isLeftShift,
IC, PN))
return false;
return true;
@@ -187,7 +187,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
/// GetShiftedValue - When CanEvaluateShifted returned true for an expression,
/// this value inserts the new computation that produces the shifted value.
static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
- InstCombiner &IC) {
+ InstCombiner &IC, const DataLayout &DL) {
// We can always evaluate constants shifted.
if (Constant *C = dyn_cast<Constant>(V)) {
if (isLeftShift)
@@ -196,8 +196,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
V = IC.Builder->CreateLShr(C, NumBits);
// If we got a constantexpr back, try to simplify it with TD info.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- V = ConstantFoldConstantExpression(CE, IC.getDataLayout(),
- IC.getTargetLibraryInfo());
+ V = ConstantFoldConstantExpression(CE, DL, IC.getTargetLibraryInfo());
return V;
}
@@ -210,8 +209,10 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
case Instruction::Or:
case Instruction::Xor:
// Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
- I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC));
- I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
+ I->setOperand(
+ 0, GetShiftedValue(I->getOperand(0), NumBits, isLeftShift, IC, DL));
+ I->setOperand(
+ 1, GetShiftedValue(I->getOperand(1), NumBits, isLeftShift, IC, DL));
return I;
case Instruction::Shl: {
@@ -297,8 +298,10 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
}
case Instruction::Select:
- I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
- I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC));
+ I->setOperand(
+ 1, GetShiftedValue(I->getOperand(1), NumBits, isLeftShift, IC, DL));
+ I->setOperand(
+ 2, GetShiftedValue(I->getOperand(2), NumBits, isLeftShift, IC, DL));
return I;
case Instruction::PHI: {
// We can change a phi if we can change all operands. Note that we never
@@ -306,8 +309,8 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i),
- NumBits, isLeftShift, IC));
+ PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i), NumBits,
+ isLeftShift, IC, DL));
return PN;
}
}
@@ -337,8 +340,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression"
" to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n");
- return ReplaceInstUsesWith(I,
- GetShiftedValue(Op0, COp1->getZExtValue(), isLeftShift, *this));
+ return ReplaceInstUsesWith(
+ I, GetShiftedValue(Op0, COp1->getZExtValue(), isLeftShift, *this, DL));
}
// See if we can simplify any instructions used by the instruction whose sole
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index ad6983a..80628b2 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
-#include "llvm/IR/DataLayout.h"
+#include "InstCombineInternal.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
@@ -44,19 +44,6 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
Demanded &= OpC->getValue();
I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
- // If either 'nsw' or 'nuw' is set and the constant is negative,
- // removing *any* bits from the constant could make overflow occur.
- // Remove 'nsw' and 'nuw' from the instruction in this case.
- if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I)) {
- assert(OBO->getOpcode() == Instruction::Add);
- if (OBO->hasNoSignedWrap() || OBO->hasNoUnsignedWrap()) {
- if (OpC->getValue().isNegative()) {
- cast<BinaryOperator>(OBO)->setHasNoSignedWrap(false);
- cast<BinaryOperator>(OBO)->setHasNoUnsignedWrap(false);
- }
- }
- }
-
return true;
}
@@ -70,8 +57,8 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
- Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask,
- KnownZero, KnownOne, 0, &Inst);
+ Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne,
+ 0, &Inst);
if (!V) return false;
if (V == &Inst) return true;
ReplaceInstUsesWith(Inst, V);
@@ -84,9 +71,9 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
APInt &KnownZero, APInt &KnownOne,
unsigned Depth) {
- Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
- KnownZero, KnownOne, Depth,
- dyn_cast<Instruction>(U.getUser()));
+ auto *UserI = dyn_cast<Instruction>(U.getUser());
+ Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero,
+ KnownOne, Depth, UserI);
if (!NewVal) return false;
U = NewVal;
return true;
@@ -122,15 +109,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
assert(Depth <= 6 && "Limit Search Depth");
uint32_t BitWidth = DemandedMask.getBitWidth();
Type *VTy = V->getType();
- assert((DL || !VTy->isPointerTy()) &&
- "SimplifyDemandedBits needs to know bit widths!");
- assert((!DL || DL->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
- (!VTy->isIntOrIntVectorTy() ||
- VTy->getScalarSizeInBits() == BitWidth) &&
- KnownZero.getBitWidth() == BitWidth &&
- KnownOne.getBitWidth() == BitWidth &&
- "Value *V, DemandedMask, KnownZero and KnownOne "
- "must have same BitWidth");
+ assert(
+ (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) &&
+ KnownZero.getBitWidth() == BitWidth &&
+ KnownOne.getBitWidth() == BitWidth &&
+ "Value *V, DemandedMask, KnownZero and KnownOne "
+ "must have same BitWidth");
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
// We know all of the bits for a constant!
KnownOne = CI->getValue() & DemandedMask;
@@ -174,9 +158,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// this instruction has a simpler value in that context.
if (I->getOpcode() == Instruction::And) {
// If either the LHS or the RHS are Zero, the result is zero.
- computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
CxtI);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
CxtI);
// If all of the demanded bits are known 1 on one side, return the other.
@@ -198,9 +182,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// only bits from X or Y are demanded.
// If either the LHS or the RHS are One, the result is One.
- computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
CxtI);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
CxtI);
// If all of the demanded bits are known zero on one side, return the
@@ -225,9 +209,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// We can simplify (X^Y) -> X or Y in the user's context if we know that
// only bits from X or Y are demanded.
- computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
CxtI);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
CxtI);
// If all of the demanded bits are known zero on one side, return the
@@ -256,10 +240,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
break;
case Instruction::And:
// If either the LHS or the RHS are Zero, the result is zero.
- if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
- RHSKnownZero, RHSKnownOne, Depth+1) ||
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
+ RHSKnownOne, Depth + 1) ||
SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ LHSKnownZero, LHSKnownOne, Depth + 1))
return I;
assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
@@ -294,10 +278,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
break;
case Instruction::Or:
// If either the LHS or the RHS are One, the result is One.
- if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
- RHSKnownZero, RHSKnownOne, Depth+1) ||
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
+ RHSKnownOne, Depth + 1) ||
SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ LHSKnownZero, LHSKnownOne, Depth + 1))
return I;
assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
@@ -336,10 +320,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
KnownOne = RHSKnownOne | LHSKnownOne;
break;
case Instruction::Xor: {
- if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
- RHSKnownZero, RHSKnownOne, Depth+1) ||
- SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
+ RHSKnownOne, Depth + 1) ||
+ SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, LHSKnownZero,
+ LHSKnownOne, Depth + 1))
return I;
assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
@@ -423,10 +407,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
break;
}
case Instruction::Select:
- if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask,
- RHSKnownZero, RHSKnownOne, Depth+1) ||
- SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ // If this is a select as part of a min/max pattern, don't simplify any
+ // further in case we break the structure.
+ Value *LHS, *RHS;
+ if (matchSelectPattern(I, LHS, RHS) != SPF_UNKNOWN)
+ return nullptr;
+
+ if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero,
+ RHSKnownOne, Depth + 1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero,
+ LHSKnownOne, Depth + 1))
return I;
assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
@@ -445,8 +435,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
DemandedMask = DemandedMask.zext(truncBf);
KnownZero = KnownZero.zext(truncBf);
KnownOne = KnownOne.zext(truncBf);
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
+ KnownOne, Depth + 1))
return I;
DemandedMask = DemandedMask.trunc(BitWidth);
KnownZero = KnownZero.trunc(BitWidth);
@@ -471,8 +461,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// Don't touch a vector-to-scalar bitcast.
return nullptr;
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
+ KnownOne, Depth + 1))
return I;
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
break;
@@ -483,8 +473,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
DemandedMask = DemandedMask.trunc(SrcBitWidth);
KnownZero = KnownZero.trunc(SrcBitWidth);
KnownOne = KnownOne.trunc(SrcBitWidth);
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
+ KnownOne, Depth + 1))
return I;
DemandedMask = DemandedMask.zext(BitWidth);
KnownZero = KnownZero.zext(BitWidth);
@@ -510,8 +500,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
KnownZero = KnownZero.trunc(SrcBitWidth);
KnownOne = KnownOne.trunc(SrcBitWidth);
- if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, KnownZero,
+ KnownOne, Depth + 1))
return I;
InputDemandedBits = InputDemandedBits.zext(BitWidth);
KnownZero = KnownZero.zext(BitWidth);
@@ -532,113 +522,35 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
break;
}
- case Instruction::Add: {
- // Figure out what the input bits are. If the top bits of the and result
- // are not demanded, then the add doesn't demand them from its input
- // either.
+ case Instruction::Add:
+ case Instruction::Sub: {
+ /// If the high-bits of an ADD/SUB are not demanded, then we do not care
+ /// about the high bits of the operands.
unsigned NLZ = DemandedMask.countLeadingZeros();
-
- // If there is a constant on the RHS, there are a variety of xformations
- // we can do.
- if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
- // If null, this should be simplified elsewhere. Some of the xforms here
- // won't work if the RHS is zero.
- if (RHS->isZero())
- break;
-
- // If the top bit of the output is demanded, demand everything from the
- // input. Otherwise, we demand all the input bits except NLZ top bits.
- APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ));
-
- // Find information about known zero/one bits in the input.
- if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits,
- LHSKnownZero, LHSKnownOne, Depth+1))
- return I;
-
- // If the RHS of the add has bits set that can't affect the input, reduce
- // the constant.
- if (ShrinkDemandedConstant(I, 1, InDemandedBits))
- return I;
-
- // Avoid excess work.
- if (LHSKnownZero == 0 && LHSKnownOne == 0)
- break;
-
- // Turn it into OR if input bits are zero.
- if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
- Instruction *Or =
- BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
- I->getName());
- return InsertNewInstWith(Or, *I);
- }
-
- // We can say something about the output known-zero and known-one bits,
- // depending on potential carries from the input constant and the
- // unknowns. For example if the LHS is known to have at most the 0x0F0F0
- // bits set and the RHS constant is 0x01001, then we know we have a known
- // one mask of 0x00001 and a known zero mask of 0xE0F0E.
-
- // To compute this, we first compute the potential carry bits. These are
- // the bits which may be modified. I'm not aware of a better way to do
- // this scan.
- const APInt &RHSVal = RHS->getValue();
- APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal));
-
- // Now that we know which bits have carries, compute the known-1/0 sets.
-
- // Bits are known one if they are known zero in one operand and one in the
- // other, and there is no input carry.
- KnownOne = ((LHSKnownZero & RHSVal) |
- (LHSKnownOne & ~RHSVal)) & ~CarryBits;
-
- // Bits are known zero if they are known zero in both operands and there
- // is no input carry.
- KnownZero = LHSKnownZero & ~RHSVal & ~CarryBits;
- } else {
- // If the high-bits of this ADD are not demanded, then it does not demand
- // the high bits of its LHS or RHS.
- if (DemandedMask[BitWidth-1] == 0) {
- // Right fill the mask of bits for this ADD to demand the most
- // significant bit and all those below it.
- APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
- LHSKnownZero, LHSKnownOne, Depth+1) ||
- SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
- LHSKnownZero, LHSKnownOne, Depth+1))
- return I;
- }
- }
- break;
- }
- case Instruction::Sub:
- // If the high-bits of this SUB are not demanded, then it does not demand
- // the high bits of its LHS or RHS.
- if (DemandedMask[BitWidth-1] == 0) {
- // Right fill the mask of bits for this SUB to demand the most
+ if (NLZ > 0) {
+ // Right fill the mask of bits for this ADD/SUB to demand the most
// significant bit and all those below it.
- uint32_t NLZ = DemandedMask.countLeadingZeros();
APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
- LHSKnownZero, LHSKnownOne, Depth+1) ||
+ LHSKnownZero, LHSKnownOne, Depth + 1) ||
+ ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ LHSKnownZero, LHSKnownOne, Depth + 1)) {
+ // Disable the nsw and nuw flags here: We can no longer guarantee that
+ // we won't wrap after simplification. Removing the nsw/nuw flags is
+ // legal here because the top bit is not demanded.
+ BinaryOperator &BinOP = *cast<BinaryOperator>(I);
+ BinOP.setHasNoSignedWrap(false);
+ BinOP.setHasNoUnsignedWrap(false);
return I;
+ }
}
- // Otherwise just hand the sub off to computeKnownBits to fill in
+ // Otherwise just hand the add/sub off to computeKnownBits to fill in
// the known zeros and ones.
computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
-
- // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
- // zero.
- if (ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(0))) {
- APInt I0 = C0->getValue();
- if ((I0 + 1).isPowerOf2() && (I0 | KnownZero).isAllOnesValue()) {
- Instruction *Xor = BinaryOperator::CreateXor(I->getOperand(1), C0);
- return InsertNewInstWith(Xor, *I);
- }
- }
break;
+ }
case Instruction::Shl:
if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
{
@@ -662,8 +574,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
else if (IOp->hasNoUnsignedWrap())
DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
+ KnownOne, Depth + 1))
return I;
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
KnownZero <<= ShiftAmt;
@@ -686,8 +598,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (cast<LShrOperator>(I)->isExact())
DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
+ KnownOne, Depth + 1))
return I;
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
@@ -731,8 +643,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (cast<AShrOperator>(I)->isExact())
DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
+ KnownOne, Depth + 1))
return I;
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
// Compute the new bits that are at the top now.
@@ -772,8 +684,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
APInt LowBits = RA - 1;
APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
- if (SimplifyDemandedBits(I->getOperandUse(0), Mask2,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, LHSKnownZero,
+ LHSKnownOne, Depth + 1))
return I;
// The low bits of LHS are unchanged by the srem.
@@ -798,7 +710,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// remainder is zero.
if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
CxtI);
// If it's known zero, our sign bit is also zero.
if (LHSKnownZero.isNegative())
@@ -808,10 +720,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
case Instruction::URem: {
APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
APInt AllOnes = APInt::getAllOnesValue(BitWidth);
- if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes,
- KnownZero2, KnownOne2, Depth+1) ||
- SimplifyDemandedBits(I->getOperandUse(1), AllOnes,
- KnownZero2, KnownOne2, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, KnownZero2,
+ KnownOne2, Depth + 1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), AllOnes, KnownZero2,
+ KnownOne2, Depth + 1))
return I;
unsigned Leaders = KnownZero2.countLeadingOnes();
@@ -1051,7 +963,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// Note that we can't propagate undef elt info, because we don't know
// which elt is getting updated.
TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
- UndefElts2, Depth+1);
+ UndefElts2, Depth + 1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
break;
}
@@ -1069,7 +981,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
APInt DemandedElts2 = DemandedElts;
DemandedElts2.clearBit(IdxNo);
TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
- UndefElts, Depth+1);
+ UndefElts, Depth + 1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
// The inserted element is defined.
@@ -1097,12 +1009,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
APInt UndefElts4(LHSVWidth, 0);
TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
- UndefElts4, Depth+1);
+ UndefElts4, Depth + 1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
APInt UndefElts3(LHSVWidth, 0);
TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
- UndefElts3, Depth+1);
+ UndefElts3, Depth + 1);
if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
bool NewUndefElts = false;
@@ -1152,12 +1064,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
}
}
- TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded,
- UndefElts, Depth+1);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, UndefElts,
+ Depth + 1);
if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
- UndefElts2, Depth+1);
+ UndefElts2, Depth + 1);
if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
// Output elements are undefined if both are undefined.
@@ -1204,7 +1116,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// div/rem demand all inputs, because they don't want divide by zero.
TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
- UndefElts2, Depth+1);
+ UndefElts2, Depth + 1);
if (TmpV) {
I->setOperand(0, TmpV);
MadeChange = true;
@@ -1238,11 +1150,11 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
case Instruction::Sub:
case Instruction::Mul:
// div/rem demand all inputs, because they don't want divide by zero.
- TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
- UndefElts, Depth+1);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
+ Depth + 1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
- UndefElts2, Depth+1);
+ UndefElts2, Depth + 1);
if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
// Output elements are undefined if both are undefined. Consider things
@@ -1251,8 +1163,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
break;
case Instruction::FPTrunc:
case Instruction::FPExt:
- TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
- UndefElts, Depth+1);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
+ Depth + 1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
break;
@@ -1273,10 +1185,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
case Intrinsic::x86_sse2_min_sd:
case Intrinsic::x86_sse2_max_sd:
TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
- UndefElts, Depth+1);
+ UndefElts, Depth + 1);
if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
- UndefElts2, Depth+1);
+ UndefElts2, Depth + 1);
if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
// If only the low elt is demanded and this is a scalarizable intrinsic,
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index cb16584..24446c8 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -12,7 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/PatternMatch.h"
using namespace llvm;
using namespace PatternMatch;
@@ -201,8 +202,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
APInt UndefElts(VectorWidth, 0);
APInt DemandedMask(VectorWidth, 0);
DemandedMask.setBit(IndexVal);
- if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
- DemandedMask, UndefElts)) {
+ if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), DemandedMask,
+ UndefElts)) {
EI.setOperand(0, V);
return &EI;
}
@@ -732,7 +733,8 @@ static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) {
case Instruction::GetElementPtr: {
Value *Ptr = NewOps[0];
ArrayRef<Value*> Idx = NewOps.slice(1);
- GetElementPtrInst *GEP = GetElementPtrInst::Create(Ptr, Idx, "", I);
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
return GEP;
}
@@ -853,10 +855,32 @@ static void RecognizeIdentityMask(const SmallVectorImpl<int> &Mask,
}
}
+// Returns true if the shuffle is extracting a contiguous range of values from
+// LHS, for example:
+// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
+// Shuffles to: |EE|FF|GG|HH|
+// +--+--+--+--+
+static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
+ SmallVector<int, 16> &Mask) {
+ unsigned LHSElems =
+ cast<VectorType>(SVI.getOperand(0)->getType())->getNumElements();
+ unsigned MaskElems = Mask.size();
+ unsigned BegIdx = Mask.front();
+ unsigned EndIdx = Mask.back();
+ if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
+ return false;
+ for (unsigned I = 0; I != MaskElems; ++I)
+ if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
+ return false;
+ return true;
+}
+
Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Value *LHS = SVI.getOperand(0);
Value *RHS = SVI.getOperand(1);
SmallVector<int, 16> Mask = SVI.getShuffleMask();
+ Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
bool MadeChange = false;
@@ -892,18 +916,17 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
SmallVector<Constant*, 16> Elts;
for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
if (Mask[i] < 0) {
- Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
+ Elts.push_back(UndefValue::get(Int32Ty));
continue;
}
if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
(Mask[i] < (int)e && isa<UndefValue>(LHS))) {
Mask[i] = -1; // Turn into undef.
- Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
+ Elts.push_back(UndefValue::get(Int32Ty));
} else {
Mask[i] = Mask[i] % e; // Force to LHS.
- Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()),
- Mask[i]));
+ Elts.push_back(ConstantInt::get(Int32Ty, Mask[i]));
}
}
SVI.setOperand(0, SVI.getOperand(1));
@@ -929,6 +952,95 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
return ReplaceInstUsesWith(SVI, V);
}
+ // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
+ // a non-vector type. We can instead bitcast the original vector followed by
+ // an extract of the desired element:
+ //
+ // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
+ // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // %1 = bitcast <4 x i8> %sroa to i32
+ // Becomes:
+ // %bc = bitcast <16 x i8> %in to <4 x i32>
+ // %ext = extractelement <4 x i32> %bc, i32 0
+ //
+ // If the shuffle is extracting a contiguous range of values from the input
+ // vector then each use which is a bitcast of the extracted size can be
+ // replaced. This will work if the vector types are compatible, and the begin
+ // index is aligned to a value in the casted vector type. If the begin index
+ // isn't aligned then we can shuffle the original vector (keeping the same
+ // vector type) before extracting.
+ //
+ // This code will bail out if the target type is fundamentally incompatible
+ // with vectors of the source type.
+ //
+ // Example of <16 x i8>, target type i32:
+ // Index range [4,8): v-----------v Will work.
+ // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ // <16 x i8>: | | | | | | | | | | | | | | | | |
+ // <4 x i32>: | | | | |
+ // +-----------+-----------+-----------+-----------+
+ // Index range [6,10): ^-----------^ Needs an extra shuffle.
+ // Target type i40: ^--------------^ Won't work, bail.
+ if (isShuffleExtractingFromLHS(SVI, Mask)) {
+ Value *V = LHS;
+ unsigned MaskElems = Mask.size();
+ unsigned BegIdx = Mask.front();
+ VectorType *SrcTy = cast<VectorType>(V->getType());
+ unsigned VecBitWidth = SrcTy->getBitWidth();
+ unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
+ assert(SrcElemBitWidth && "vector elements must have a bitwidth");
+ unsigned SrcNumElems = SrcTy->getNumElements();
+ SmallVector<BitCastInst *, 8> BCs;
+ DenseMap<Type *, Value *> NewBCs;
+ for (User *U : SVI.users())
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
+ if (!BC->use_empty())
+ // Only visit bitcasts that weren't previously handled.
+ BCs.push_back(BC);
+ for (BitCastInst *BC : BCs) {
+ Type *TgtTy = BC->getDestTy();
+ unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
+ if (!TgtElemBitWidth)
+ continue;
+ unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
+ bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
+ bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
+ if (!VecBitWidthsEqual)
+ continue;
+ if (!VectorType::isValidElementType(TgtTy))
+ continue;
+ VectorType *CastSrcTy = VectorType::get(TgtTy, TgtNumElems);
+ if (!BegIsAligned) {
+ // Shuffle the input so [0,NumElements) contains the output, and
+ // [NumElems,SrcNumElems) is undef.
+ SmallVector<Constant *, 16> ShuffleMask(SrcNumElems,
+ UndefValue::get(Int32Ty));
+ for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
+ ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx);
+ V = Builder->CreateShuffleVector(V, UndefValue::get(V->getType()),
+ ConstantVector::get(ShuffleMask),
+ SVI.getName() + ".extract");
+ BegIdx = 0;
+ }
+ unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
+ assert(SrcElemsPerTgtElem);
+ BegIdx /= SrcElemsPerTgtElem;
+ bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
+ auto *NewBC =
+ BCAlreadyExists
+ ? NewBCs[CastSrcTy]
+ : Builder->CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
+ if (!BCAlreadyExists)
+ NewBCs[CastSrcTy] = NewBC;
+ auto *Ext = Builder->CreateExtractElement(
+ NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
+ // The shufflevector isn't being replaced: the bitcast that used it
+ // is. InstCombine will visit the newly-created instructions.
+ ReplaceInstUsesWith(*BC, Ext);
+ MadeChange = true;
+ }
+ }
+
// If the LHS is a shufflevector itself, see if we can combine it with this
// one without producing an unusual shuffle.
// Cases that might be simplified:
@@ -1099,7 +1211,6 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// or is a splat, do the replacement.
if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
SmallVector<Constant*, 16> Elts;
- Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
if (newMask[i] < 0) {
Elts.push_back(UndefValue::get(Int32Ty));
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
deleted file mode 100644
index 8d857d0..0000000
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ /dev/null
@@ -1,107 +0,0 @@
-//===- InstCombineWorklist.h - Worklist for InstCombine pass ----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEWORKLIST_H
-#define LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEWORKLIST_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-#define DEBUG_TYPE "instcombine"
-
-namespace llvm {
-
-/// InstCombineWorklist - This is the worklist management logic for
-/// InstCombine.
-class LLVM_LIBRARY_VISIBILITY InstCombineWorklist {
- SmallVector<Instruction*, 256> Worklist;
- DenseMap<Instruction*, unsigned> WorklistMap;
-
- void operator=(const InstCombineWorklist&RHS) LLVM_DELETED_FUNCTION;
- InstCombineWorklist(const InstCombineWorklist&) LLVM_DELETED_FUNCTION;
-public:
- InstCombineWorklist() {}
-
- bool isEmpty() const { return Worklist.empty(); }
-
- /// Add - Add the specified instruction to the worklist if it isn't already
- /// in it.
- void Add(Instruction *I) {
- if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) {
- DEBUG(dbgs() << "IC: ADD: " << *I << '\n');
- Worklist.push_back(I);
- }
- }
-
- void AddValue(Value *V) {
- if (Instruction *I = dyn_cast<Instruction>(V))
- Add(I);
- }
-
- /// AddInitialGroup - Add the specified batch of stuff in reverse order.
- /// which should only be done when the worklist is empty and when the group
- /// has no duplicates.
- void AddInitialGroup(Instruction *const *List, unsigned NumEntries) {
- assert(Worklist.empty() && "Worklist must be empty to add initial group");
- Worklist.reserve(NumEntries+16);
- WorklistMap.resize(NumEntries);
- DEBUG(dbgs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
- for (unsigned Idx = 0; NumEntries; --NumEntries) {
- Instruction *I = List[NumEntries-1];
- WorklistMap.insert(std::make_pair(I, Idx++));
- Worklist.push_back(I);
- }
- }
-
- // Remove - remove I from the worklist if it exists.
- void Remove(Instruction *I) {
- DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I);
- if (It == WorklistMap.end()) return; // Not in worklist.
-
- // Don't bother moving everything down, just null out the slot.
- Worklist[It->second] = nullptr;
-
- WorklistMap.erase(It);
- }
-
- Instruction *RemoveOne() {
- Instruction *I = Worklist.pop_back_val();
- WorklistMap.erase(I);
- return I;
- }
-
- /// AddUsersToWorkList - When an instruction is simplified, add all users of
- /// the instruction to the work lists because they might get more simplified
- /// now.
- ///
- void AddUsersToWorkList(Instruction &I) {
- for (User *U : I.users())
- Add(cast<Instruction>(U));
- }
-
-
- /// Zap - check that the worklist is empty and nuke the backing store for
- /// the map if it is large.
- void Zap() {
- assert(WorklistMap.empty() && "Worklist empty, but map not?");
-
- // Do an explicit clear, this shrinks the map if needed.
- WorklistMap.clear();
- }
-};
-
-} // end namespace llvm.
-
-#undef DEBUG_TYPE
-
-#endif
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index a0c239a..9d602c6 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -33,8 +33,8 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
-#include "InstCombine.h"
+#include "llvm/Transforms/InstCombine/InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm-c/Initialization.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -43,8 +43,10 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LibCallSemantics.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DataLayout.h"
@@ -55,7 +57,8 @@
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <climits>
@@ -72,35 +75,8 @@ STATISTIC(NumExpand, "Number of expansions");
STATISTIC(NumFactor , "Number of factorizations");
STATISTIC(NumReassoc , "Number of reassociations");
-// Initialization Routines
-void llvm::initializeInstCombine(PassRegistry &Registry) {
- initializeInstCombinerPass(Registry);
-}
-
-void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
- initializeInstCombine(*unwrap(R));
-}
-
-char InstCombiner::ID = 0;
-INITIALIZE_PASS_BEGIN(InstCombiner, "instcombine",
- "Combine redundant instructions", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(InstCombiner, "instcombine",
- "Combine redundant instructions", false, false)
-
-void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetLibraryInfo>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
-}
-
-
Value *InstCombiner::EmitGEPOffset(User *GEP) {
- return llvm::EmitGEPOffset(Builder, *getDataLayout(), GEP);
+ return llvm::EmitGEPOffset(Builder, DL, GEP);
}
/// ShouldChangeType - Return true if it is desirable to convert a computation
@@ -109,13 +85,10 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) {
bool InstCombiner::ShouldChangeType(Type *From, Type *To) const {
assert(From->isIntegerTy() && To->isIntegerTy());
- // If we don't have DL, we don't know if the source/dest are legal.
- if (!DL) return false;
-
unsigned FromWidth = From->getPrimitiveSizeInBits();
unsigned ToWidth = To->getPrimitiveSizeInBits();
- bool FromLegal = DL->isLegalInteger(FromWidth);
- bool ToLegal = DL->isLegalInteger(ToWidth);
+ bool FromLegal = DL.isLegalInteger(FromWidth);
+ bool ToLegal = DL.isLegalInteger(ToWidth);
// If this is a legal integer from type, and the result would be an illegal
// type, don't do the transformation.
@@ -470,7 +443,7 @@ getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode,
/// This tries to simplify binary operations by factorizing out common terms
/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
- const DataLayout *DL, BinaryOperator &I,
+ const DataLayout &DL, BinaryOperator &I,
Instruction::BinaryOps InnerOpcode, Value *A,
Value *B, Value *C, Value *D) {
@@ -479,6 +452,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
if (!A || !C || !B || !D)
return nullptr;
+ Value *V = nullptr;
Value *SimplifiedInst = nullptr;
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
@@ -495,7 +469,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
std::swap(C, D);
// Consider forming "A op' (B op D)".
// If "B op D" simplifies then it can be formed with no cost.
- Value *V = SimplifyBinOp(TopLevelOpcode, B, D, DL);
+ V = SimplifyBinOp(TopLevelOpcode, B, D, DL);
// If "B op D" doesn't simplify then only go on if both of the existing
// operations "A op' B" and "C op' D" will be zapped as no longer used.
if (!V && LHS->hasOneUse() && RHS->hasOneUse())
@@ -514,7 +488,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
std::swap(C, D);
// Consider forming "(A op C) op' B".
// If "A op C" simplifies then it can be formed with no cost.
- Value *V = SimplifyBinOp(TopLevelOpcode, A, C, DL);
+ V = SimplifyBinOp(TopLevelOpcode, A, C, DL);
// If "A op C" doesn't simplify then only go on if both of the existing
// operations "A op' B" and "C op' D" will be zapped as no longer used.
@@ -544,7 +518,19 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS))
if (isa<OverflowingBinaryOperator>(Op1))
HasNSW &= Op1->hasNoSignedWrap();
- BO->setHasNoSignedWrap(HasNSW);
+
+ // We can propogate 'nsw' if we know that
+ // %Y = mul nsw i16 %X, C
+ // %Z = add nsw i16 %Y, %X
+ // =>
+ // %Z = mul nsw i16 %X, C+1
+ //
+ // iff C+1 isn't INT_MIN
+ const APInt *CInt;
+ if (TopLevelOpcode == Instruction::Add &&
+ InnerOpcode == Instruction::Mul)
+ if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue())
+ BO->setHasNoSignedWrap(HasNSW);
}
}
}
@@ -741,6 +727,22 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
return nullptr;
}
+ // Test if a CmpInst instruction is used exclusively by a select as
+ // part of a minimum or maximum operation. If so, refrain from doing
+ // any other folding. This helps out other analyses which understand
+ // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
+ // and CodeGen. And in this case, at least one of the comparison
+ // operands has at least one user besides the compare (the select),
+ // which would often largely negate the benefit of folding anyway.
+ if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
+ if (CI->hasOneUse()) {
+ Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+ if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
+ (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
+ return nullptr;
+ }
+ }
+
Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this);
Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this);
@@ -750,7 +752,6 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
return nullptr;
}
-
/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which
/// has a PHI node as operand #0, see if we can fold the instruction into the
/// PHI (which is only possible if all operands to the PHI are constants).
@@ -799,8 +800,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
// If the incoming non-constant value is in I's block, we will remove one
// instruction, but insert another equivalent one, leading to infinite
// instcombine.
- if (isPotentiallyReachable(I.getParent(), NonConstBB, DT,
- getAnalysisIfAvailable<LoopInfo>()))
+ if (isPotentiallyReachable(I.getParent(), NonConstBB, DT, LI))
return nullptr;
}
@@ -897,23 +897,18 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
/// whether or not there is a sequence of GEP indices into the pointed type that
/// will land us at the specified offset. If so, fill them into NewIndices and
/// return the resultant element type, otherwise return null.
-Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
- SmallVectorImpl<Value*> &NewIndices) {
- assert(PtrTy->isPtrOrPtrVectorTy());
-
- if (!DL)
- return nullptr;
-
- Type *Ty = PtrTy->getPointerElementType();
+Type *InstCombiner::FindElementAtOffset(PointerType *PtrTy, int64_t Offset,
+ SmallVectorImpl<Value *> &NewIndices) {
+ Type *Ty = PtrTy->getElementType();
if (!Ty->isSized())
return nullptr;
// Start with the index over the outer type. Note that the type size
// might be zero (even if the offset isn't zero) if the indexed type
// is something like [0 x {int, int}]
- Type *IntPtrTy = DL->getIntPtrType(PtrTy);
+ Type *IntPtrTy = DL.getIntPtrType(PtrTy);
int64_t FirstIdx = 0;
- if (int64_t TySize = DL->getTypeAllocSize(Ty)) {
+ if (int64_t TySize = DL.getTypeAllocSize(Ty)) {
FirstIdx = Offset/TySize;
Offset -= FirstIdx*TySize;
@@ -931,11 +926,11 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
// Index into the types. If we fail, set OrigBase to null.
while (Offset) {
// Indexing into tail padding between struct/array elements.
- if (uint64_t(Offset*8) >= DL->getTypeSizeInBits(Ty))
+ if (uint64_t(Offset * 8) >= DL.getTypeSizeInBits(Ty))
return nullptr;
if (StructType *STy = dyn_cast<StructType>(Ty)) {
- const StructLayout *SL = DL->getStructLayout(STy);
+ const StructLayout *SL = DL.getStructLayout(STy);
assert(Offset < (int64_t)SL->getSizeInBytes() &&
"Offset must stay within the indexed type");
@@ -946,7 +941,7 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
Offset -= SL->getElementOffset(Elt);
Ty = STy->getElementType(Elt);
} else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
- uint64_t EltSize = DL->getTypeAllocSize(AT->getElementType());
+ uint64_t EltSize = DL.getTypeAllocSize(AT->getElementType());
assert(EltSize && "Cannot index into a zero-sized array");
NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
Offset %= EltSize;
@@ -1240,7 +1235,8 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
// It may not be safe to reorder shuffles and things like div, urem, etc.
// because we may trap when executing those ops on unknown vector elements.
// See PR20059.
- if (!isSafeToSpeculativelyExecute(&Inst, DL)) return nullptr;
+ if (!isSafeToSpeculativelyExecute(&Inst))
+ return nullptr;
unsigned VWidth = cast<VectorType>(Inst.getType())->getNumElements();
Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
@@ -1326,37 +1322,37 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Eliminate unneeded casts for indices, and replace indices which displace
// by multiples of a zero size type with zero.
- if (DL) {
- bool MadeChange = false;
- Type *IntPtrTy = DL->getIntPtrType(GEP.getPointerOperandType());
-
- gep_type_iterator GTI = gep_type_begin(GEP);
- for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
- I != E; ++I, ++GTI) {
- // Skip indices into struct types.
- SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI);
- if (!SeqTy) continue;
-
- // If the element type has zero size then any index over it is equivalent
- // to an index of zero, so replace it with zero if it is not zero already.
- if (SeqTy->getElementType()->isSized() &&
- DL->getTypeAllocSize(SeqTy->getElementType()) == 0)
- if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) {
- *I = Constant::getNullValue(IntPtrTy);
- MadeChange = true;
- }
+ bool MadeChange = false;
+ Type *IntPtrTy = DL.getIntPtrType(GEP.getPointerOperandType());
+
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
+ ++I, ++GTI) {
+ // Skip indices into struct types.
+ SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI);
+ if (!SeqTy)
+ continue;
- Type *IndexTy = (*I)->getType();
- if (IndexTy != IntPtrTy) {
- // If we are using a wider index than needed for this platform, shrink
- // it to what we need. If narrower, sign-extend it to what we need.
- // This explicit cast can make subsequent optimizations more obvious.
- *I = Builder->CreateIntCast(*I, IntPtrTy, true);
+ // If the element type has zero size then any index over it is equivalent
+ // to an index of zero, so replace it with zero if it is not zero already.
+ if (SeqTy->getElementType()->isSized() &&
+ DL.getTypeAllocSize(SeqTy->getElementType()) == 0)
+ if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) {
+ *I = Constant::getNullValue(IntPtrTy);
MadeChange = true;
}
+
+ Type *IndexTy = (*I)->getType();
+ if (IndexTy != IntPtrTy) {
+ // If we are using a wider index than needed for this platform, shrink
+ // it to what we need. If narrower, sign-extend it to what we need.
+ // This explicit cast can make subsequent optimizations more obvious.
+ *I = Builder->CreateIntCast(*I, IntPtrTy, true);
+ MadeChange = true;
}
- if (MadeChange) return &GEP;
}
+ if (MadeChange)
+ return &GEP;
// Check to see if the inputs to the PHI node are getelementptr instructions.
if (PHINode *PN = dyn_cast<PHINode>(PtrOp)) {
@@ -1364,6 +1360,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (!Op1)
return nullptr;
+ // Don't fold a GEP into itself through a PHI node. This can only happen
+ // through the back-edge of a loop. Folding a GEP into itself means that
+ // the value of the previous iteration needs to be stored in the meantime,
+ // thus requiring an additional register variable to be live, but not
+ // actually achieving anything (the GEP still needs to be executed once per
+ // loop iteration).
+ if (Op1 == &GEP)
+ return nullptr;
+
signed DI = -1;
for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
@@ -1371,6 +1376,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands())
return nullptr;
+ // As for Op1 above, don't try to fold a GEP into itself.
+ if (Op2 == &GEP)
+ return nullptr;
+
// Keep track of the type as we walk the GEP.
Type *CurTy = Op1->getOperand(0)->getType()->getScalarType();
@@ -1417,8 +1426,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (DI == -1) {
// All the GEPs feeding the PHI are identical. Clone one down into our
// BB so that it can be merged with the current GEP.
- GEP.getParent()->getInstList().insert(GEP.getParent()->getFirstNonPHI(),
- NewGEP);
+ GEP.getParent()->getInstList().insert(
+ GEP.getParent()->getFirstInsertionPt(), NewGEP);
} else {
// All the GEPs feeding the PHI differ at a single offset. Clone a GEP
// into the current block so it can be merged, and create a new PHI to
@@ -1434,8 +1443,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
PN->getIncomingBlock(I));
NewGEP->setOperand(DI, NewPN);
- GEP.getParent()->getInstList().insert(GEP.getParent()->getFirstNonPHI(),
- NewGEP);
+ GEP.getParent()->getInstList().insert(
+ GEP.getParent()->getFirstInsertionPt(), NewGEP);
NewGEP->setOperand(DI, NewPN);
}
@@ -1486,6 +1495,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// normalized.
if (SO1->getType() != GO1->getType())
return nullptr;
+ // Only do the combine when GO1 and SO1 are both constants. Only in
+ // this case, we are sure the cost after the merge is never more than
+ // that before the merge.
+ if (!isa<Constant>(GO1) || !isa<Constant>(SO1))
+ return nullptr;
Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
}
@@ -1507,19 +1521,22 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
if (!Indices.empty())
- return (GEP.isInBounds() && Src->isInBounds()) ?
- GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices,
- GEP.getName()) :
- GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName());
+ return GEP.isInBounds() && Src->isInBounds()
+ ? GetElementPtrInst::CreateInBounds(
+ Src->getSourceElementType(), Src->getOperand(0), Indices,
+ GEP.getName())
+ : GetElementPtrInst::Create(Src->getSourceElementType(),
+ Src->getOperand(0), Indices,
+ GEP.getName());
}
- if (DL && GEP.getNumIndices() == 1) {
+ if (GEP.getNumIndices() == 1) {
unsigned AS = GEP.getPointerAddressSpace();
if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
- DL->getPointerSizeInBits(AS)) {
+ DL.getPointerSizeInBits(AS)) {
Type *PtrTy = GEP.getPointerOperandType();
Type *Ty = PtrTy->getPointerElementType();
- uint64_t TyAllocSize = DL->getTypeAllocSize(Ty);
+ uint64_t TyAllocSize = DL.getTypeAllocSize(Ty);
bool Matched = false;
uint64_t C;
@@ -1588,8 +1605,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (CATy->getElementType() == StrippedPtrTy->getElementType()) {
// -> GEP i8* X, ...
SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end());
- GetElementPtrInst *Res =
- GetElementPtrInst::Create(StrippedPtr, Idx, GEP.getName());
+ GetElementPtrInst *Res = GetElementPtrInst::Create(
+ StrippedPtrTy->getElementType(), StrippedPtr, Idx, GEP.getName());
Res->setIsInBounds(GEP.isInBounds());
if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
return Res;
@@ -1613,6 +1630,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// is a leading zero) we can fold the cast into this GEP.
if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) {
GEP.setOperand(0, StrippedPtr);
+ GEP.setSourceElementType(XATy);
return &GEP;
}
// Cannot replace the base pointer directly because StrippedPtr's
@@ -1625,9 +1643,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// %0 = GEP [10 x i8] addrspace(1)* X, ...
// addrspacecast i8 addrspace(1)* %0 to i8*
SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end());
- Value *NewGEP = GEP.isInBounds() ?
- Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
- Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
+ Value *NewGEP = GEP.isInBounds()
+ ? Builder->CreateInBoundsGEP(
+ nullptr, StrippedPtr, Idx, GEP.getName())
+ : Builder->CreateGEP(nullptr, StrippedPtr, Idx,
+ GEP.getName());
return new AddrSpaceCastInst(NewGEP, GEP.getType());
}
}
@@ -1638,14 +1658,16 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
Type *SrcElTy = StrippedPtrTy->getElementType();
Type *ResElTy = PtrOp->getType()->getPointerElementType();
- if (DL && SrcElTy->isArrayTy() &&
- DL->getTypeAllocSize(SrcElTy->getArrayElementType()) ==
- DL->getTypeAllocSize(ResElTy)) {
- Type *IdxType = DL->getIntPtrType(GEP.getType());
+ if (SrcElTy->isArrayTy() &&
+ DL.getTypeAllocSize(SrcElTy->getArrayElementType()) ==
+ DL.getTypeAllocSize(ResElTy)) {
+ Type *IdxType = DL.getIntPtrType(GEP.getType());
Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
- Value *NewGEP = GEP.isInBounds() ?
- Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
- Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
+ Value *NewGEP =
+ GEP.isInBounds()
+ ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, Idx,
+ GEP.getName())
+ : Builder->CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName());
// V and GEP are both pointer types --> BitCast
return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
@@ -1656,11 +1678,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// %V = mul i64 %N, 4
// %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V
// into: %t1 = getelementptr i32* %arr, i32 %N; bitcast
- if (DL && ResElTy->isSized() && SrcElTy->isSized()) {
+ if (ResElTy->isSized() && SrcElTy->isSized()) {
// Check that changing the type amounts to dividing the index by a scale
// factor.
- uint64_t ResSize = DL->getTypeAllocSize(ResElTy);
- uint64_t SrcSize = DL->getTypeAllocSize(SrcElTy);
+ uint64_t ResSize = DL.getTypeAllocSize(ResElTy);
+ uint64_t SrcSize = DL.getTypeAllocSize(SrcElTy);
if (ResSize && SrcSize % ResSize == 0) {
Value *Idx = GEP.getOperand(1);
unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -1668,7 +1690,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Earlier transforms ensure that the index has type IntPtrType, which
// considerably simplifies the logic by eliminating implicit casts.
- assert(Idx->getType() == DL->getIntPtrType(GEP.getType()) &&
+ assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) &&
"Index not cast to pointer width?");
bool NSW;
@@ -1676,9 +1698,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
// If the multiplication NewIdx * Scale may overflow then the new
// GEP may not be "inbounds".
- Value *NewGEP = GEP.isInBounds() && NSW ?
- Builder->CreateInBoundsGEP(StrippedPtr, NewIdx, GEP.getName()) :
- Builder->CreateGEP(StrippedPtr, NewIdx, GEP.getName());
+ Value *NewGEP =
+ GEP.isInBounds() && NSW
+ ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, NewIdx,
+ GEP.getName())
+ : Builder->CreateGEP(nullptr, StrippedPtr, NewIdx,
+ GEP.getName());
// The NewGEP must be pointer typed, so must the old one -> BitCast
return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
@@ -1691,13 +1716,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
// (where tmp = 8*tmp2) into:
// getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
- if (DL && ResElTy->isSized() && SrcElTy->isSized() &&
- SrcElTy->isArrayTy()) {
+ if (ResElTy->isSized() && SrcElTy->isSized() && SrcElTy->isArrayTy()) {
// Check that changing to the array element type amounts to dividing the
// index by a scale factor.
- uint64_t ResSize = DL->getTypeAllocSize(ResElTy);
- uint64_t ArrayEltSize
- = DL->getTypeAllocSize(SrcElTy->getArrayElementType());
+ uint64_t ResSize = DL.getTypeAllocSize(ResElTy);
+ uint64_t ArrayEltSize =
+ DL.getTypeAllocSize(SrcElTy->getArrayElementType());
if (ResSize && ArrayEltSize % ResSize == 0) {
Value *Idx = GEP.getOperand(1);
unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -1705,7 +1729,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Earlier transforms ensure that the index has type IntPtrType, which
// considerably simplifies the logic by eliminating implicit casts.
- assert(Idx->getType() == DL->getIntPtrType(GEP.getType()) &&
+ assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) &&
"Index not cast to pointer width?");
bool NSW;
@@ -1714,13 +1738,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// If the multiplication NewIdx * Scale may overflow then the new
// GEP may not be "inbounds".
Value *Off[2] = {
- Constant::getNullValue(DL->getIntPtrType(GEP.getType())),
- NewIdx
- };
-
- Value *NewGEP = GEP.isInBounds() && NSW ?
- Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) :
- Builder->CreateGEP(StrippedPtr, Off, GEP.getName());
+ Constant::getNullValue(DL.getIntPtrType(GEP.getType())),
+ NewIdx};
+
+ Value *NewGEP = GEP.isInBounds() && NSW
+ ? Builder->CreateInBoundsGEP(
+ SrcElTy, StrippedPtr, Off, GEP.getName())
+ : Builder->CreateGEP(SrcElTy, StrippedPtr, Off,
+ GEP.getName());
// The NewGEP must be pointer typed, so must the old one -> BitCast
return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
GEP.getType());
@@ -1730,9 +1755,6 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
}
- if (!DL)
- return nullptr;
-
// addrspacecast between types is canonicalized as a bitcast, then an
// addrspacecast. To take advantage of the below bitcast + struct GEP, look
// through the addrspacecast.
@@ -1753,10 +1775,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
Value *Operand = BCI->getOperand(0);
PointerType *OpType = cast<PointerType>(Operand->getType());
- unsigned OffsetBits = DL->getPointerTypeSizeInBits(GEP.getType());
+ unsigned OffsetBits = DL.getPointerTypeSizeInBits(GEP.getType());
APInt Offset(OffsetBits, 0);
if (!isa<BitCastInst>(Operand) &&
- GEP.accumulateConstantOffset(*DL, Offset)) {
+ GEP.accumulateConstantOffset(DL, Offset)) {
// If this GEP instruction doesn't move the pointer, just replace the GEP
// with a bitcast of the real input to the dest type.
@@ -1785,9 +1807,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// GEP.
SmallVector<Value*, 8> NewIndices;
if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) {
- Value *NGEP = GEP.isInBounds() ?
- Builder->CreateInBoundsGEP(Operand, NewIndices) :
- Builder->CreateGEP(Operand, NewIndices);
+ Value *NGEP =
+ GEP.isInBounds()
+ ? Builder->CreateInBoundsGEP(nullptr, Operand, NewIndices)
+ : Builder->CreateGEP(nullptr, Operand, NewIndices);
if (NGEP->getType() == GEP.getType())
return ReplaceInstUsesWith(GEP, NGEP);
@@ -1820,7 +1843,7 @@ isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users,
case Instruction::BitCast:
case Instruction::GetElementPtr:
- Users.push_back(I);
+ Users.emplace_back(I);
Worklist.push_back(I);
continue;
@@ -1829,7 +1852,7 @@ isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users,
// We can fold eq/ne comparisons with null to false/true, respectively.
if (!ICI->isEquality() || !isa<ConstantPointerNull>(ICI->getOperand(1)))
return false;
- Users.push_back(I);
+ Users.emplace_back(I);
continue;
}
@@ -1855,13 +1878,13 @@ isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users,
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::objectsize:
- Users.push_back(I);
+ Users.emplace_back(I);
continue;
}
}
if (isFreeCall(I, TLI)) {
- Users.push_back(I);
+ Users.emplace_back(I);
continue;
}
return false;
@@ -1870,7 +1893,7 @@ isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users,
StoreInst *SI = cast<StoreInst>(I);
if (SI->isVolatile() || SI->getPointerOperand() != PI)
return false;
- Users.push_back(I);
+ Users.emplace_back(I);
continue;
}
}
@@ -2038,6 +2061,15 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
return &BI;
}
+ // If the condition is irrelevant, remove the use so that other
+ // transforms on the condition become more effective.
+ if (BI.isConditional() &&
+ BI.getSuccessor(0) == BI.getSuccessor(1) &&
+ !isa<UndefValue>(BI.getCondition())) {
+ BI.setCondition(UndefValue::get(BI.getCondition()->getType()));
+ return &BI;
+ }
+
// Canonicalize fcmp_one -> fcmp_oeq
FCmpInst::Predicate FPred; Value *Y;
if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)),
@@ -2077,7 +2109,7 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
Value *Cond = SI.getCondition();
unsigned BitWidth = cast<IntegerType>(Cond->getType())->getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(Cond, KnownZero, KnownOne);
+ computeKnownBits(Cond, KnownZero, KnownOne, 0, &SI);
unsigned LeadingKnownZeros = KnownZero.countLeadingOnes();
unsigned LeadingKnownOnes = KnownOne.countLeadingOnes();
@@ -2096,8 +2128,8 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
// x86 generates redundant zero-extenstion instructions if the operand is
// truncated to i8 or i16.
bool TruncCond = false;
- if (DL && BitWidth > NewWidth &&
- NewWidth >= DL->getLargestLegalIntTypeSize()) {
+ if (NewWidth > 0 && BitWidth > NewWidth &&
+ NewWidth >= DL.getLargestLegalIntTypeSize()) {
TruncCond = true;
IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
Builder->SetInsertPoint(&SI);
@@ -2270,7 +2302,8 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
// We need to insert these at the location of the old load, not at that of
// the extractvalue.
Builder->SetInsertPoint(L->getParent(), L);
- Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(), Indices);
+ Value *GEP = Builder->CreateInBoundsGEP(L->getType(),
+ L->getPointerOperand(), Indices);
// Returning the load directly will cause the main loop to insert it in
// the wrong spot, so use ReplaceInstUsesWith().
return ReplaceInstUsesWith(EV, Builder->CreateLoad(GEP));
@@ -2286,41 +2319,27 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
return nullptr;
}
-enum Personality_Type {
- Unknown_Personality,
- GNU_Ada_Personality,
- GNU_CXX_Personality,
- GNU_ObjC_Personality
-};
-
-/// RecognizePersonality - See if the given exception handling personality
-/// function is one that we understand. If so, return a description of it;
-/// otherwise return Unknown_Personality.
-static Personality_Type RecognizePersonality(Value *Pers) {
- Function *F = dyn_cast<Function>(Pers->stripPointerCasts());
- if (!F)
- return Unknown_Personality;
- return StringSwitch<Personality_Type>(F->getName())
- .Case("__gnat_eh_personality", GNU_Ada_Personality)
- .Case("__gxx_personality_v0", GNU_CXX_Personality)
- .Case("__objc_personality_v0", GNU_ObjC_Personality)
- .Default(Unknown_Personality);
-}
-
/// isCatchAll - Return 'true' if the given typeinfo will match anything.
-static bool isCatchAll(Personality_Type Personality, Constant *TypeInfo) {
+static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
switch (Personality) {
- case Unknown_Personality:
+ case EHPersonality::GNU_C:
+ // The GCC C EH personality only exists to support cleanups, so it's not
+ // clear what the semantics of catch clauses are.
return false;
- case GNU_Ada_Personality:
+ case EHPersonality::Unknown:
+ return false;
+ case EHPersonality::GNU_Ada:
// While __gnat_all_others_value will match any Ada exception, it doesn't
// match foreign exceptions (or didn't, before gcc-4.7).
return false;
- case GNU_CXX_Personality:
- case GNU_ObjC_Personality:
+ case EHPersonality::GNU_CXX:
+ case EHPersonality::GNU_ObjC:
+ case EHPersonality::MSVC_X86SEH:
+ case EHPersonality::MSVC_Win64SEH:
+ case EHPersonality::MSVC_CXX:
return TypeInfo->isNullValue();
}
- llvm_unreachable("Unknown personality!");
+ llvm_unreachable("invalid enum");
}
static bool shorter_filter(const Value *LHS, const Value *RHS) {
@@ -2334,7 +2353,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
// The logic here should be correct for any real-world personality function.
// However if that turns out not to be true, the offending logic can always
// be conditioned on the personality function, like the catch-all logic is.
- Personality_Type Personality = RecognizePersonality(LI.getPersonalityFn());
+ EHPersonality Personality = classifyEHPersonality(LI.getPersonalityFn());
// Simplify the list of clauses, eg by removing repeated catch clauses
// (these are often created by inlining).
@@ -2625,9 +2644,6 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
return nullptr;
}
-
-
-
/// TryToSinkInstruction - Try to move the specified instruction from its
/// current block into the beginning of DestBlock, which can only happen if it's
/// safe to move the instruction past all of the instructions between it and the
@@ -2660,164 +2676,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
return true;
}
-
-/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding
-/// all reachable code to the worklist.
-///
-/// This has a couple of tricks to make the code faster and more powerful. In
-/// particular, we constant fold and DCE instructions as we go, to avoid adding
-/// them to the worklist (this significantly speeds up instcombine on code where
-/// many instructions are dead or constant). Additionally, if we find a branch
-/// whose condition is a known constant, we only visit the reachable successors.
-///
-static bool AddReachableCodeToWorklist(BasicBlock *BB,
- SmallPtrSetImpl<BasicBlock*> &Visited,
- InstCombiner &IC,
- const DataLayout *DL,
- const TargetLibraryInfo *TLI) {
- bool MadeIRChange = false;
- SmallVector<BasicBlock*, 256> Worklist;
- Worklist.push_back(BB);
-
- SmallVector<Instruction*, 128> InstrsForInstCombineWorklist;
- DenseMap<ConstantExpr*, Constant*> FoldedConstants;
-
- do {
- BB = Worklist.pop_back_val();
-
- // We have now visited this block! If we've already been here, ignore it.
- if (!Visited.insert(BB).second)
- continue;
-
- for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
- Instruction *Inst = BBI++;
-
- // DCE instruction if trivially dead.
- if (isInstructionTriviallyDead(Inst, TLI)) {
- ++NumDeadInst;
- DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
- Inst->eraseFromParent();
- continue;
- }
-
- // ConstantProp instruction if trivially constant.
- if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
- if (Constant *C = ConstantFoldInstruction(Inst, DL, TLI)) {
- DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: "
- << *Inst << '\n');
- Inst->replaceAllUsesWith(C);
- ++NumConstProp;
- Inst->eraseFromParent();
- continue;
- }
-
- if (DL) {
- // See if we can constant fold its operands.
- for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end();
- i != e; ++i) {
- ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
- if (CE == nullptr) continue;
-
- Constant*& FoldRes = FoldedConstants[CE];
- if (!FoldRes)
- FoldRes = ConstantFoldConstantExpression(CE, DL, TLI);
- if (!FoldRes)
- FoldRes = CE;
-
- if (FoldRes != CE) {
- *i = FoldRes;
- MadeIRChange = true;
- }
- }
- }
-
- InstrsForInstCombineWorklist.push_back(Inst);
- }
-
- // Recursively visit successors. If this is a branch or switch on a
- // constant, only visit the reachable successor.
- TerminatorInst *TI = BB->getTerminator();
- if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
- if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
- bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
- BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
- Worklist.push_back(ReachableBB);
- continue;
- }
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
- if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
- // See if this is an explicit destination.
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
- i != e; ++i)
- if (i.getCaseValue() == Cond) {
- BasicBlock *ReachableBB = i.getCaseSuccessor();
- Worklist.push_back(ReachableBB);
- continue;
- }
-
- // Otherwise it is the default destination.
- Worklist.push_back(SI->getDefaultDest());
- continue;
- }
- }
-
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- Worklist.push_back(TI->getSuccessor(i));
- } while (!Worklist.empty());
-
- // Once we've found all of the instructions to add to instcombine's worklist,
- // add them in reverse order. This way instcombine will visit from the top
- // of the function down. This jives well with the way that it adds all uses
- // of instructions to the worklist after doing a transformation, thus avoiding
- // some N^2 behavior in pathological cases.
- IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0],
- InstrsForInstCombineWorklist.size());
-
- return MadeIRChange;
-}
-
-bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
- MadeIRChange = false;
-
- DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
- << F.getName() << "\n");
-
- {
- // Do a depth-first traversal of the function, populate the worklist with
- // the reachable instructions. Ignore blocks that are not reachable. Keep
- // track of which blocks we visit.
- SmallPtrSet<BasicBlock*, 64> Visited;
- MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, DL,
- TLI);
-
- // Do a quick scan over the function. If we find any blocks that are
- // unreachable, remove any instructions inside of them. This prevents
- // the instcombine code from having to deal with some bad special cases.
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (Visited.count(BB)) continue;
-
- // Delete the instructions backwards, as it has a reduced likelihood of
- // having to update as many def-use and use-def chains.
- Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
- while (EndInst != BB->begin()) {
- // Delete the next to last instruction.
- BasicBlock::iterator I = EndInst;
- Instruction *Inst = --I;
- if (!Inst->use_empty())
- Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
- if (isa<LandingPadInst>(Inst)) {
- EndInst = Inst;
- continue;
- }
- if (!isa<DbgInfoIntrinsic>(Inst)) {
- ++NumDeadInst;
- MadeIRChange = true;
- }
- Inst->eraseFromParent();
- }
- }
- }
-
+bool InstCombiner::run() {
while (!Worklist.isEmpty()) {
Instruction *I = Worklist.RemoveOne();
if (I == nullptr) continue; // skip null values.
@@ -2832,7 +2691,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
}
// Instruction isn't dead, see if we can constant propagate it.
- if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
+ if (!I->use_empty() && isa<Constant>(I->getOperand(0))) {
if (Constant *C = ConstantFoldInstruction(I, DL, TLI)) {
DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
@@ -2843,6 +2702,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
MadeIRChange = true;
continue;
}
+ }
// See if we can trivially sink this instruction to a successor basic block.
if (I->hasOneUse()) {
@@ -2900,7 +2760,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
DEBUG(dbgs() << "IC: Old = " << *I << '\n'
<< " New = " << *Result << '\n');
- if (!I->getDebugLoc().isUnknown())
+ if (I->getDebugLoc())
Result->setDebugLoc(I->getDebugLoc());
// Everything uses the new instruction now.
I->replaceAllUsesWith(Result);
@@ -2947,63 +2807,287 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
return MadeIRChange;
}
-namespace {
-class InstCombinerLibCallSimplifier final : public LibCallSimplifier {
- InstCombiner *IC;
-public:
- InstCombinerLibCallSimplifier(const DataLayout *DL,
- const TargetLibraryInfo *TLI,
- InstCombiner *IC)
- : LibCallSimplifier(DL, TLI) {
- this->IC = IC;
- }
+/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding
+/// all reachable code to the worklist.
+///
+/// This has a couple of tricks to make the code faster and more powerful. In
+/// particular, we constant fold and DCE instructions as we go, to avoid adding
+/// them to the worklist (this significantly speeds up instcombine on code where
+/// many instructions are dead or constant). Additionally, if we find a branch
+/// whose condition is a known constant, we only visit the reachable successors.
+///
+static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
+ SmallPtrSetImpl<BasicBlock *> &Visited,
+ InstCombineWorklist &ICWorklist,
+ const TargetLibraryInfo *TLI) {
+ bool MadeIRChange = false;
+ SmallVector<BasicBlock*, 256> Worklist;
+ Worklist.push_back(BB);
- /// replaceAllUsesWith - override so that instruction replacement
- /// can be defined in terms of the instruction combiner framework.
- void replaceAllUsesWith(Instruction *I, Value *With) const override {
- IC->ReplaceInstUsesWith(*I, With);
- }
-};
+ SmallVector<Instruction*, 128> InstrsForInstCombineWorklist;
+ DenseMap<ConstantExpr*, Constant*> FoldedConstants;
+
+ do {
+ BB = Worklist.pop_back_val();
+
+ // We have now visited this block! If we've already been here, ignore it.
+ if (!Visited.insert(BB).second)
+ continue;
+
+ for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
+ Instruction *Inst = BBI++;
+
+ // DCE instruction if trivially dead.
+ if (isInstructionTriviallyDead(Inst, TLI)) {
+ ++NumDeadInst;
+ DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n');
+ Inst->eraseFromParent();
+ continue;
+ }
+
+ // ConstantProp instruction if trivially constant.
+ if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
+ if (Constant *C = ConstantFoldInstruction(Inst, DL, TLI)) {
+ DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: "
+ << *Inst << '\n');
+ Inst->replaceAllUsesWith(C);
+ ++NumConstProp;
+ Inst->eraseFromParent();
+ continue;
+ }
+
+ // See if we can constant fold its operands.
+ for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); i != e;
+ ++i) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
+ if (CE == nullptr)
+ continue;
+
+ Constant *&FoldRes = FoldedConstants[CE];
+ if (!FoldRes)
+ FoldRes = ConstantFoldConstantExpression(CE, DL, TLI);
+ if (!FoldRes)
+ FoldRes = CE;
+
+ if (FoldRes != CE) {
+ *i = FoldRes;
+ MadeIRChange = true;
+ }
+ }
+
+ InstrsForInstCombineWorklist.push_back(Inst);
+ }
+
+ // Recursively visit successors. If this is a branch or switch on a
+ // constant, only visit the reachable successor.
+ TerminatorInst *TI = BB->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
+ bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
+ BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
+ Worklist.push_back(ReachableBB);
+ continue;
+ }
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+ if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
+ // See if this is an explicit destination.
+ for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+ i != e; ++i)
+ if (i.getCaseValue() == Cond) {
+ BasicBlock *ReachableBB = i.getCaseSuccessor();
+ Worklist.push_back(ReachableBB);
+ continue;
+ }
+
+ // Otherwise it is the default destination.
+ Worklist.push_back(SI->getDefaultDest());
+ continue;
+ }
+ }
+
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ Worklist.push_back(TI->getSuccessor(i));
+ } while (!Worklist.empty());
+
+ // Once we've found all of the instructions to add to instcombine's worklist,
+ // add them in reverse order. This way instcombine will visit from the top
+ // of the function down. This jives well with the way that it adds all uses
+ // of instructions to the worklist after doing a transformation, thus avoiding
+ // some N^2 behavior in pathological cases.
+ ICWorklist.AddInitialGroup(&InstrsForInstCombineWorklist[0],
+ InstrsForInstCombineWorklist.size());
+
+ return MadeIRChange;
}
-bool InstCombiner::runOnFunction(Function &F) {
- if (skipOptnoneFunction(F))
- return false;
+/// \brief Populate the IC worklist from a function, and prune any dead basic
+/// blocks discovered in the process.
+///
+/// This also does basic constant propagation and other forward fixing to make
+/// the combiner itself run much faster.
+static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
+ TargetLibraryInfo *TLI,
+ InstCombineWorklist &ICWorklist) {
+ bool MadeIRChange = false;
+
+ // Do a depth-first traversal of the function, populate the worklist with
+ // the reachable instructions. Ignore blocks that are not reachable. Keep
+ // track of which blocks we visit.
+ SmallPtrSet<BasicBlock *, 64> Visited;
+ MadeIRChange |=
+ AddReachableCodeToWorklist(F.begin(), DL, Visited, ICWorklist, TLI);
+
+ // Do a quick scan over the function. If we find any blocks that are
+ // unreachable, remove any instructions inside of them. This prevents
+ // the instcombine code from having to deal with some bad special cases.
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (Visited.count(BB))
+ continue;
+
+ // Delete the instructions backwards, as it has a reduced likelihood of
+ // having to update as many def-use and use-def chains.
+ Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
+ while (EndInst != BB->begin()) {
+ // Delete the next to last instruction.
+ BasicBlock::iterator I = EndInst;
+ Instruction *Inst = --I;
+ if (!Inst->use_empty())
+ Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
+ if (isa<LandingPadInst>(Inst)) {
+ EndInst = Inst;
+ continue;
+ }
+ if (!isa<DbgInfoIntrinsic>(Inst)) {
+ ++NumDeadInst;
+ MadeIRChange = true;
+ }
+ Inst->eraseFromParent();
+ }
+ }
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- TLI = &getAnalysis<TargetLibraryInfo>();
+ return MadeIRChange;
+}
+static bool
+combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist,
+ AssumptionCache &AC, TargetLibraryInfo &TLI,
+ DominatorTree &DT, LoopInfo *LI = nullptr) {
// Minimizing size?
- MinimizeSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::MinSize);
+ bool MinimizeSize = F.hasFnAttribute(Attribute::MinSize);
+ auto &DL = F.getParent()->getDataLayout();
/// Builder - This is an IRBuilder that automatically inserts new
/// instructions into the worklist when they are created.
- IRBuilder<true, TargetFolder, InstCombineIRInserter> TheBuilder(
- F.getContext(), TargetFolder(DL), InstCombineIRInserter(Worklist, AC));
- Builder = &TheBuilder;
-
- InstCombinerLibCallSimplifier TheSimplifier(DL, TLI, this);
- Simplifier = &TheSimplifier;
-
- bool EverMadeChange = false;
+ IRBuilder<true, TargetFolder, InstCombineIRInserter> Builder(
+ F.getContext(), TargetFolder(DL), InstCombineIRInserter(Worklist, &AC));
// Lower dbg.declare intrinsics otherwise their value may be clobbered
// by instcombiner.
- EverMadeChange = LowerDbgDeclare(F);
+ bool DbgDeclaresChanged = LowerDbgDeclare(F);
// Iterate while there is work to do.
- unsigned Iteration = 0;
- while (DoOneIteration(F, Iteration++))
- EverMadeChange = true;
+ int Iteration = 0;
+ for (;;) {
+ ++Iteration;
+ DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
+ << F.getName() << "\n");
+
+ bool Changed = false;
+ if (prepareICWorklistFromFunction(F, DL, &TLI, Worklist))
+ Changed = true;
+
+ InstCombiner IC(Worklist, &Builder, MinimizeSize, &AC, &TLI, &DT, DL, LI);
+ if (IC.run())
+ Changed = true;
+
+ if (!Changed)
+ break;
+ }
+
+ return DbgDeclaresChanged || Iteration > 1;
+}
+
+PreservedAnalyses InstCombinePass::run(Function &F,
+ AnalysisManager<Function> *AM) {
+ auto &AC = AM->getResult<AssumptionAnalysis>(F);
+ auto &DT = AM->getResult<DominatorTreeAnalysis>(F);
+ auto &TLI = AM->getResult<TargetLibraryAnalysis>(F);
+
+ auto *LI = AM->getCachedResult<LoopAnalysis>(F);
+
+ if (!combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, LI))
+ // No changes, all analyses are preserved.
+ return PreservedAnalyses::all();
+
+ // Mark all the analyses that instcombine updates as preserved.
+ // FIXME: Need a way to preserve CFG analyses here!
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
+
+namespace {
+/// \brief The legacy pass manager's instcombine pass.
+///
+/// This is a basic whole-function wrapper around the instcombine utility. It
+/// will try to combine all instructions in the function.
+class InstructionCombiningPass : public FunctionPass {
+ InstCombineWorklist Worklist;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ InstructionCombiningPass() : FunctionPass(ID) {
+ initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnFunction(Function &F) override;
+};
+}
+
+void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+}
+
+bool InstructionCombiningPass::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
+
+ // Required analyses.
+ auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ // Optional analyses.
+ auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+ auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
- Builder = nullptr;
- return EverMadeChange;
+ return combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, LI);
+}
+
+char InstructionCombiningPass::ID = 0;
+INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine",
+ "Combine redundant instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
+ "Combine redundant instructions", false, false)
+
+// Initialization Routines
+void llvm::initializeInstCombine(PassRegistry &Registry) {
+ initializeInstructionCombiningPassPass(Registry);
+}
+
+void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
+ initializeInstructionCombiningPassPass(*unwrap(R));
}
FunctionPass *llvm::createInstructionCombiningPass() {
- return new InstCombiner();
+ return new InstructionCombiningPass();
}
OpenPOWER on IntegriCloud