summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp')
-rw-r--r--contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp410
1 files changed, 272 insertions, 138 deletions
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 83b4b82..6de380b 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -11,16 +11,17 @@
//
//===----------------------------------------------------------------------===//
-#include "InstCombine.h"
+#include "InstCombineInternal.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/IR/CallSite.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
using namespace llvm;
using namespace PatternMatch;
@@ -60,8 +61,8 @@ static Type *reduceToSingleValueType(Type *T) {
}
Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
- unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, AC, MI, DT);
- unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, AC, MI, DT);
+ unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, AC, DT);
+ unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, AC, DT);
unsigned MinAlign = std::min(DstAlign, SrcAlign);
unsigned CopyAlign = MI->getAlignment();
@@ -107,7 +108,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
if (StrippedDest != MI->getArgOperand(0)) {
Type *SrcETy = cast<PointerType>(StrippedDest->getType())
->getElementType();
- if (DL && SrcETy->isSized() && DL->getTypeStoreSize(SrcETy) == Size) {
+ if (SrcETy->isSized() && DL.getTypeStoreSize(SrcETy) == Size) {
// The SrcETy might be something like {{{double}}} or [1 x double]. Rip
// down through these levels if so.
SrcETy = reduceToSingleValueType(SrcETy);
@@ -155,7 +156,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
}
Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
- unsigned Alignment = getKnownAlignment(MI->getDest(), DL, AC, MI, DT);
+ unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, AC, DT);
if (MI->getAlignment() < Alignment) {
MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
Alignment, false));
@@ -197,11 +198,137 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
return nullptr;
}
+static Value *SimplifyX86insertps(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
+ VectorType *VecTy = cast<VectorType>(II.getType());
+ assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
+
+ // The immediate permute control byte looks like this:
+ // [3:0] - zero mask for each 32-bit lane
+ // [5:4] - select one 32-bit destination lane
+ // [7:6] - select one 32-bit source lane
+
+ uint8_t Imm = CInt->getZExtValue();
+ uint8_t ZMask = Imm & 0xf;
+ uint8_t DestLane = (Imm >> 4) & 0x3;
+ uint8_t SourceLane = (Imm >> 6) & 0x3;
+
+ ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
+
+ // If all zero mask bits are set, this was just a weird way to
+ // generate a zero vector.
+ if (ZMask == 0xf)
+ return ZeroVector;
+
+ // Initialize by passing all of the first source bits through.
+ int ShuffleMask[4] = { 0, 1, 2, 3 };
+
+ // We may replace the second operand with the zero vector.
+ Value *V1 = II.getArgOperand(1);
+
+ if (ZMask) {
+ // If the zero mask is being used with a single input or the zero mask
+ // overrides the destination lane, this is a shuffle with the zero vector.
+ if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
+ (ZMask & (1 << DestLane))) {
+ V1 = ZeroVector;
+ // We may still move 32-bits of the first source vector from one lane
+ // to another.
+ ShuffleMask[DestLane] = SourceLane;
+ // The zero mask may override the previous insert operation.
+ for (unsigned i = 0; i < 4; ++i)
+ if ((ZMask >> i) & 0x1)
+ ShuffleMask[i] = i + 4;
+ } else {
+ // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle?
+ return nullptr;
+ }
+ } else {
+ // Replace the selected destination lane with the selected source lane.
+ ShuffleMask[DestLane] = SourceLane + 4;
+ }
+
+ return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
+ }
+ return nullptr;
+}
+
+/// The shuffle mask for a perm2*128 selects any two halves of two 256-bit
+/// source vectors, unless a zero bit is set. If a zero bit is set,
+/// then ignore that half of the mask and clear that half of the vector.
+static Value *SimplifyX86vperm2(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
+ VectorType *VecTy = cast<VectorType>(II.getType());
+ ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
+
+ // The immediate permute control byte looks like this:
+ // [1:0] - select 128 bits from sources for low half of destination
+ // [2] - ignore
+ // [3] - zero low half of destination
+ // [5:4] - select 128 bits from sources for high half of destination
+ // [6] - ignore
+ // [7] - zero high half of destination
+
+ uint8_t Imm = CInt->getZExtValue();
+
+ bool LowHalfZero = Imm & 0x08;
+ bool HighHalfZero = Imm & 0x80;
+
+ // If both zero mask bits are set, this was just a weird way to
+ // generate a zero vector.
+ if (LowHalfZero && HighHalfZero)
+ return ZeroVector;
+
+ // If 0 or 1 zero mask bits are set, this is a simple shuffle.
+ unsigned NumElts = VecTy->getNumElements();
+ unsigned HalfSize = NumElts / 2;
+ SmallVector<int, 8> ShuffleMask(NumElts);
+
+ // The high bit of the selection field chooses the 1st or 2nd operand.
+ bool LowInputSelect = Imm & 0x02;
+ bool HighInputSelect = Imm & 0x20;
+
+ // The low bit of the selection field chooses the low or high half
+ // of the selected operand.
+ bool LowHalfSelect = Imm & 0x01;
+ bool HighHalfSelect = Imm & 0x10;
+
+ // Determine which operand(s) are actually in use for this instruction.
+ Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
+ Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
+
+ // If needed, replace operands based on zero mask.
+ V0 = LowHalfZero ? ZeroVector : V0;
+ V1 = HighHalfZero ? ZeroVector : V1;
+
+ // Permute low half of result.
+ unsigned StartIndex = LowHalfSelect ? HalfSize : 0;
+ for (unsigned i = 0; i < HalfSize; ++i)
+ ShuffleMask[i] = StartIndex + i;
+
+ // Permute high half of result.
+ StartIndex = HighHalfSelect ? HalfSize : 0;
+ StartIndex += NumElts;
+ for (unsigned i = 0; i < HalfSize; ++i)
+ ShuffleMask[i + HalfSize] = StartIndex + i;
+
+ return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
+ }
+ return nullptr;
+}
+
/// visitCallInst - CallInst simplification. This mostly only handles folding
/// of intrinsic instructions. For normal calls, it allows visitCallSite to do
/// the heavy lifting.
///
Instruction *InstCombiner::visitCallInst(CallInst &CI) {
+ auto Args = CI.arg_operands();
+ if (Value *V = SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), DL,
+ TLI, DT, AC))
+ return ReplaceInstUsesWith(CI, V);
+
if (isFreeCall(&CI, TLI))
return visitFree(CI);
@@ -350,112 +477,36 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
break;
- case Intrinsic::uadd_with_overflow: {
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, II);
- if (OR == OverflowResult::NeverOverflows)
- return CreateOverflowTuple(II, Builder->CreateNUWAdd(LHS, RHS), false);
- if (OR == OverflowResult::AlwaysOverflows)
- return CreateOverflowTuple(II, Builder->CreateAdd(LHS, RHS), true);
- }
- // FALL THROUGH uadd into sadd
+
+ case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
- // Canonicalize constants into the RHS.
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow:
if (isa<Constant>(II->getArgOperand(0)) &&
!isa<Constant>(II->getArgOperand(1))) {
+ // Canonicalize constants into the RHS.
Value *LHS = II->getArgOperand(0);
II->setArgOperand(0, II->getArgOperand(1));
II->setArgOperand(1, LHS);
return II;
}
+ // fall through
- // X + undef -> undef
- if (isa<UndefValue>(II->getArgOperand(1)))
- return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
-
- if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
- // X + 0 -> {X, false}
- if (RHS->isZero()) {
- return CreateOverflowTuple(II, II->getArgOperand(0), false,
- /*ReUseName*/false);
- }
- }
-
- // We can strength reduce reduce this signed add into a regular add if we
- // can prove that it will never overflow.
- if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow) {
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- if (WillNotOverflowSignedAdd(LHS, RHS, II)) {
- return CreateOverflowTuple(II, Builder->CreateNSWAdd(LHS, RHS), false);
- }
- }
-
- break;
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow: {
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- // undef - X -> undef
- // X - undef -> undef
- if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS))
- return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
-
- if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) {
- // X - 0 -> {X, false}
- if (ConstRHS->isZero()) {
- return CreateOverflowTuple(II, LHS, false, /*ReUseName*/false);
- }
- }
- if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) {
- if (WillNotOverflowSignedSub(LHS, RHS, II)) {
- return CreateOverflowTuple(II, Builder->CreateNSWSub(LHS, RHS), false);
- }
- } else {
- if (WillNotOverflowUnsignedSub(LHS, RHS, II)) {
- return CreateOverflowTuple(II, Builder->CreateNUWSub(LHS, RHS), false);
- }
- }
- break;
- }
- case Intrinsic::umul_with_overflow: {
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, II);
- if (OR == OverflowResult::NeverOverflows)
- return CreateOverflowTuple(II, Builder->CreateNUWMul(LHS, RHS), false);
- if (OR == OverflowResult::AlwaysOverflows)
- return CreateOverflowTuple(II, Builder->CreateMul(LHS, RHS), true);
- } // FALL THROUGH
- case Intrinsic::smul_with_overflow:
- // Canonicalize constants into the RHS.
- if (isa<Constant>(II->getArgOperand(0)) &&
- !isa<Constant>(II->getArgOperand(1))) {
- Value *LHS = II->getArgOperand(0);
- II->setArgOperand(0, II->getArgOperand(1));
- II->setArgOperand(1, LHS);
- return II;
- }
-
- // X * undef -> undef
- if (isa<UndefValue>(II->getArgOperand(1)))
- return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+ OverflowCheckFlavor OCF =
+ IntrinsicIDToOverflowCheckFlavor(II->getIntrinsicID());
+ assert(OCF != OCF_INVALID && "unexpected!");
- if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
- // X*0 -> {0, false}
- if (RHSI->isZero())
- return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
+ Value *OperationResult = nullptr;
+ Constant *OverflowResult = nullptr;
+ if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1),
+ *II, OperationResult, OverflowResult))
+ return CreateOverflowTuple(II, OperationResult, OverflowResult);
- // X * 1 -> {X, false}
- if (RHSI->equalsInt(1)) {
- return CreateOverflowTuple(II, II->getArgOperand(0), false,
- /*ReUseName*/false);
- }
- }
- if (II->getIntrinsicID() == Intrinsic::smul_with_overflow) {
- Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- if (WillNotOverflowSignedMul(LHS, RHS, II)) {
- return CreateOverflowTuple(II, Builder->CreateNSWMul(LHS, RHS), false);
- }
- }
break;
+ }
+
case Intrinsic::minnum:
case Intrinsic::maxnum: {
Value *Arg0 = II->getArgOperand(0);
@@ -543,7 +594,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
// Turn PPC lvx -> load if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
16) {
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
@@ -560,7 +611,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
// Turn stvx -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >=
16) {
Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(0)->getType());
@@ -575,11 +626,54 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
return new StoreInst(II->getArgOperand(0), Ptr, false, 1);
}
+ case Intrinsic::ppc_qpx_qvlfs:
+ // Turn PPC QPX qvlfs -> load if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
+ 16) {
+ Type *VTy = VectorType::get(Builder->getFloatTy(),
+ II->getType()->getVectorNumElements());
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+ PointerType::getUnqual(VTy));
+ Value *Load = Builder->CreateLoad(Ptr);
+ return new FPExtInst(Load, II->getType());
+ }
+ break;
+ case Intrinsic::ppc_qpx_qvlfd:
+ // Turn PPC QPX qvlfd -> load if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, AC, DT) >=
+ 32) {
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+ PointerType::getUnqual(II->getType()));
+ return new LoadInst(Ptr);
+ }
+ break;
+ case Intrinsic::ppc_qpx_qvstfs:
+ // Turn PPC QPX qvstfs -> store if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >=
+ 16) {
+ Type *VTy = VectorType::get(Builder->getFloatTy(),
+ II->getArgOperand(0)->getType()->getVectorNumElements());
+ Value *TOp = Builder->CreateFPTrunc(II->getArgOperand(0), VTy);
+ Type *OpPtrTy = PointerType::getUnqual(VTy);
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+ return new StoreInst(TOp, Ptr);
+ }
+ break;
+ case Intrinsic::ppc_qpx_qvstfd:
+ // Turn PPC QPX qvstfd -> store if the pointer is known aligned.
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, AC, DT) >=
+ 32) {
+ Type *OpPtrTy =
+ PointerType::getUnqual(II->getArgOperand(0)->getType());
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+ return new StoreInst(II->getArgOperand(0), Ptr);
+ }
+ break;
case Intrinsic::x86_sse_storeu_ps:
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
// Turn X86 storeu -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
16) {
Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(1)->getType());
@@ -696,15 +790,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
unsigned LowHalfElts = VWidth / 2;
APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts));
APInt UndefElts(VWidth, 0);
- if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0),
- InputDemandedElts,
- UndefElts)) {
+ if (Value *TmpV = SimplifyDemandedVectorElts(
+ II->getArgOperand(0), InputDemandedElts, UndefElts)) {
II->setArgOperand(0, TmpV);
return II;
}
break;
}
-
+ case Intrinsic::x86_sse41_insertps:
+ if (Value *V = SimplifyX86insertps(*II, *Builder))
+ return ReplaceInstUsesWith(*II, V);
+ break;
+
case Intrinsic::x86_sse4a_insertqi: {
// insertqi x, y, 64, 0 can just copy y's lower bits and leave the top
// ones undef
@@ -867,6 +964,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return ReplaceInstUsesWith(CI, Shuffle);
}
+ case Intrinsic::x86_avx_vperm2f128_pd_256:
+ case Intrinsic::x86_avx_vperm2f128_ps_256:
+ case Intrinsic::x86_avx_vperm2f128_si_256:
+ case Intrinsic::x86_avx2_vperm2i128:
+ if (Value *V = SimplifyX86vperm2(*II, *Builder))
+ return ReplaceInstUsesWith(*II, V);
+ break;
+
case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
// Note that ppc_altivec_vperm has a big-endian bias, so when creating
@@ -906,12 +1011,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
unsigned Idx =
cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
Idx &= 31; // Match the hardware behavior.
- if (DL && DL->isLittleEndian())
+ if (DL.isLittleEndian())
Idx = 31 - Idx;
if (!ExtractedElts[Idx]) {
- Value *Op0ToUse = (DL && DL->isLittleEndian()) ? Op1 : Op0;
- Value *Op1ToUse = (DL && DL->isLittleEndian()) ? Op0 : Op1;
+ Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
+ Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
ExtractedElts[Idx] =
Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
Builder->getInt32(Idx&15));
@@ -940,7 +1045,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::arm_neon_vst2lane:
case Intrinsic::arm_neon_vst3lane:
case Intrinsic::arm_neon_vst4lane: {
- unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, AC, II, DT);
+ unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, II, AC, DT);
unsigned AlignArg = II->getNumArgOperands() - 1;
ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
@@ -1079,7 +1184,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
RHS->getType()->isPointerTy() &&
cast<Constant>(RHS)->isNullValue()) {
LoadInst* LI = cast<LoadInst>(LHS);
- if (isValidAssumeForContext(II, LI, DL, DT)) {
+ if (isValidAssumeForContext(II, LI, DT)) {
MDNode *MD = MDNode::get(II->getContext(), None);
LI->setMetadata(LLVMContext::MD_nonnull, MD);
return EraseInstFromFunction(*II);
@@ -1102,7 +1207,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// facts about the relocate value, while being careful to
// preserve relocation semantics.
GCRelocateOperands Operands(II);
- Value *DerivedPtr = Operands.derivedPtr();
+ Value *DerivedPtr = Operands.getDerivedPtr();
+ auto *GCRelocateType = cast<PointerType>(II->getType());
// Remove the relocation if unused, note that this check is required
// to prevent the cases below from looping forever.
@@ -1113,24 +1219,34 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// TODO: provide a hook for this in GCStrategy. This is clearly legal for
// most practical collectors, but there was discussion in the review thread
// about whether it was legal for all possible collectors.
- if (isa<UndefValue>(DerivedPtr))
- return ReplaceInstUsesWith(*II, DerivedPtr);
+ if (isa<UndefValue>(DerivedPtr)) {
+ // gc_relocate is uncasted. Use undef of gc_relocate's type to replace it.
+ return ReplaceInstUsesWith(*II, UndefValue::get(GCRelocateType));
+ }
// The relocation of null will be null for most any collector.
// TODO: provide a hook for this in GCStrategy. There might be some weird
// collector this property does not hold for.
- if (isa<ConstantPointerNull>(DerivedPtr))
- return ReplaceInstUsesWith(*II, DerivedPtr);
+ if (isa<ConstantPointerNull>(DerivedPtr)) {
+ // gc_relocate is uncasted. Use null-pointer of gc_relocate's type to replace it.
+ return ReplaceInstUsesWith(*II, ConstantPointerNull::get(GCRelocateType));
+ }
// isKnownNonNull -> nonnull attribute
if (isKnownNonNull(DerivedPtr))
II->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
- // TODO: dereferenceable -> deref attribute
+ // isDereferenceablePointer -> deref attribute
+ if (isDereferenceablePointer(DerivedPtr, DL)) {
+ if (Argument *A = dyn_cast<Argument>(DerivedPtr)) {
+ uint64_t Bytes = A->getDereferenceableBytes();
+ II->addDereferenceableAttr(AttributeSet::ReturnIndex, Bytes);
+ }
+ }
// TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
// Canonicalize on the type from the uses to the defs
-
+
// TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
}
}
@@ -1147,8 +1263,8 @@ Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
/// isSafeToEliminateVarargsCast - If this cast does not affect the value
/// passed through the varargs area, we can eliminate the use of the cast.
static bool isSafeToEliminateVarargsCast(const CallSite CS,
- const CastInst * const CI,
- const DataLayout * const DL,
+ const DataLayout &DL,
+ const CastInst *const CI,
const int ix) {
if (!CI->isLosslessCast())
return false;
@@ -1172,7 +1288,7 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
if (!SrcTy->isSized() || !DstTy->isSized())
return false;
- if (!DL || DL->getTypeAllocSize(SrcTy) != DL->getTypeAllocSize(DstTy))
+ if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy))
return false;
return true;
}
@@ -1181,10 +1297,14 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
// Currently we're only working with the checking functions, memcpy_chk,
// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
// strcat_chk and strncat_chk.
-Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *DL) {
+Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
if (!CI->getCalledFunction()) return nullptr;
- if (Value *With = Simplifier->optimizeCall(CI)) {
+ auto InstCombineRAUW = [this](Instruction *From, Value *With) {
+ ReplaceInstUsesWith(*From, With);
+ };
+ LibCallSimplifier Simplifier(DL, TLI, InstCombineRAUW);
+ if (Value *With = Simplifier.optimizeCall(CI)) {
++NumSimplified;
return CI->use_empty() ? CI : ReplaceInstUsesWith(*CI, With);
}
@@ -1271,11 +1391,29 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) {
// visitCallSite - Improvements for call and invoke instructions.
//
Instruction *InstCombiner::visitCallSite(CallSite CS) {
+
if (isAllocLikeFn(CS.getInstruction(), TLI))
return visitAllocSite(*CS.getInstruction());
bool Changed = false;
+ // Mark any parameters that are known to be non-null with the nonnull
+ // attribute. This is helpful for inlining calls to functions with null
+ // checks on their arguments.
+ unsigned ArgNo = 0;
+ for (Value *V : CS.args()) {
+ if (!CS.paramHasAttr(ArgNo+1, Attribute::NonNull) &&
+ isKnownNonNull(V)) {
+ AttributeSet AS = CS.getAttributes();
+ AS = AS.addAttribute(CS.getInstruction()->getContext(), ArgNo+1,
+ Attribute::NonNull);
+ CS.setAttributes(AS);
+ Changed = true;
+ }
+ ArgNo++;
+ }
+ assert(ArgNo == CS.arg_size() && "sanity check");
+
// If the callee is a pointer to a function, attempt to move any casts to the
// arguments of the call/invoke.
Value *Callee = CS.getCalledValue();
@@ -1342,7 +1480,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(),
E = CS.arg_end(); I != E; ++I, ++ix) {
CastInst *CI = dyn_cast<CastInst>(*I);
- if (CI && isSafeToEliminateVarargsCast(CS, CI, DL, ix)) {
+ if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) {
*I = CI->getOperand(0);
Changed = true;
}
@@ -1359,7 +1497,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
// this. None of these calls are seen as possibly dead so go ahead and
// delete the instruction now.
if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
- Instruction *I = tryOptimizeCall(CI, DL);
+ Instruction *I = tryOptimizeCall(CI);
// If we changed something return the result, etc. Otherwise let
// the fallthrough check.
if (I) return EraseInstFromFunction(*I);
@@ -1409,10 +1547,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
- if (RAttrs.
- hasAttributes(AttributeFuncs::
- typeIncompatible(NewRetTy, AttributeSet::ReturnIndex),
- AttributeSet::ReturnIndex))
+ if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy)))
return false; // Attribute not compatible with transformed value.
}
@@ -1438,7 +1573,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
//
// into:
// call void @takes_i32_inalloca(i32* null)
- if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
+ //
+ // Similarly, avoid folding away bitcasts of byval calls.
+ if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
+ Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
return false;
CallSite::arg_iterator AI = CS.arg_begin();
@@ -1450,8 +1588,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
return false; // Cannot transform this parameter value.
if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1).
- hasAttributes(AttributeFuncs::
- typeIncompatible(ParamTy, i + 1), i + 1))
+ overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
return false; // Attribute not compatible with transformed value.
if (CS.isInAllocaArgument(i))
@@ -1463,12 +1600,12 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1,
Attribute::ByVal)) {
PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
- if (!ParamPTy || !ParamPTy->getElementType()->isSized() || !DL)
+ if (!ParamPTy || !ParamPTy->getElementType()->isSized())
return false;
Type *CurElTy = ActTy->getPointerElementType();
- if (DL->getTypeAllocSize(CurElTy) !=
- DL->getTypeAllocSize(ParamPTy->getElementType()))
+ if (DL.getTypeAllocSize(CurElTy) !=
+ DL.getTypeAllocSize(ParamPTy->getElementType()))
return false;
}
}
@@ -1524,10 +1661,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// If the return value is not being used, the type may not be compatible
// with the existing attributes. Wipe out any problematic attributes.
- RAttrs.
- removeAttributes(AttributeFuncs::
- typeIncompatible(NewRetTy, AttributeSet::ReturnIndex),
- AttributeSet::ReturnIndex);
+ RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
// Add the new return attributes.
if (RAttrs.hasAttributes())
OpenPOWER on IntegriCloud