diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/InstCombine')
15 files changed, 2469 insertions, 842 deletions
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h index 7467eca..1f6a3a5 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombine.h @@ -11,12 +11,12 @@ #define INSTCOMBINE_INSTCOMBINE_H #include "InstCombineWorklist.h" -#include "llvm/IRBuilder.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Operator.h" -#include "llvm/Pass.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Support/InstVisitor.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" +#include "llvm/InstVisitor.h" +#include "llvm/Pass.h" #include "llvm/Support/TargetFolder.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" @@ -27,7 +27,7 @@ namespace llvm { class DbgDeclareInst; class MemIntrinsic; class MemSetInst; - + /// SelectPatternFlavor - We can match a variety of different patterns for /// select operations. enum SelectPatternFlavor { @@ -36,7 +36,7 @@ enum SelectPatternFlavor { SPF_SMAX, SPF_UMAX //SPF_ABS - TODO. }; - + /// getComplexity: Assign a complexity or rank value to LLVM Values... /// 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst static inline unsigned getComplexity(Value *V) { @@ -51,23 +51,23 @@ static inline unsigned getComplexity(Value *V) { return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2; } - + /// InstCombineIRInserter - This is an IRBuilder insertion helper that works /// just like the normal insertion helper, but also adds any new instructions /// to the instcombine worklist. -class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter +class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter : public IRBuilderDefaultInserter<true> { InstCombineWorklist &Worklist; public: InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {} - + void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB, BasicBlock::iterator InsertPt) const { IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt); Worklist.Add(I); } }; - + /// InstCombiner - The -instcombine pass. class LLVM_LIBRARY_VISIBILITY InstCombiner : public FunctionPass, @@ -76,6 +76,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner TargetLibraryInfo *TLI; bool MadeIRChange; LibCallSimplifier *Simplifier; + bool MinimizeSize; public: /// Worklist - All of the instructions that need to be simplified. InstCombineWorklist Worklist; @@ -84,15 +85,16 @@ public: /// instructions into the worklist when they are created. typedef IRBuilder<true, TargetFolder, InstCombineIRInserter> BuilderTy; BuilderTy *Builder; - + static char ID; // Pass identification, replacement for typeid InstCombiner() : FunctionPass(ID), TD(0), Builder(0) { + MinimizeSize = false; initializeInstCombinerPass(*PassRegistry::getPassRegistry()); } public: virtual bool runOnFunction(Function &F); - + bool DoOneIteration(Function &F, unsigned ItNum); virtual void getAnalysisUsage(AnalysisUsage &AU) const; @@ -114,6 +116,8 @@ public: Instruction *visitSub(BinaryOperator &I); Instruction *visitFSub(BinaryOperator &I); Instruction *visitMul(BinaryOperator &I); + Value *foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C, + Instruction *InsertBefore); Instruction *visitFMul(BinaryOperator &I); Instruction *visitURem(BinaryOperator &I); Instruction *visitSRem(BinaryOperator &I); @@ -207,11 +211,11 @@ public: private: bool ShouldChangeType(Type *From, Type *To) const; Value *dyn_castNegVal(Value *V) const; - Value *dyn_castFNegVal(Value *V) const; - Type *FindElementAtOffset(Type *Ty, int64_t Offset, + Value *dyn_castFNegVal(Value *V, bool NoSignedZero=false) const; + Type *FindElementAtOffset(Type *Ty, int64_t Offset, SmallVectorImpl<Value*> &NewIndices); Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); - + /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually /// results in any code being generated and is interesting to optimize out. If /// the cast can be eliminated by some other simple transformation, we prefer @@ -243,7 +247,7 @@ public: return New; } - // InsertNewInstWith - same as InsertNewInstBefore, but also sets the + // InsertNewInstWith - same as InsertNewInstBefore, but also sets the // debug loc. // Instruction *InsertNewInstWith(Instruction *New, Instruction &Old) { @@ -259,10 +263,10 @@ public: // Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) { Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist. - + // If we are replacing the instruction with itself, this must be in a // segment of unreachable code, so just clobber the instruction. - if (&I == V) + if (&I == V) V = UndefValue::get(I.getType()); DEBUG(errs() << "IC: Replacing " << I << "\n" @@ -292,13 +296,13 @@ public: MadeIRChange = true; return 0; // Don't do anything with FI } - + void ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, unsigned Depth = 0) const { return llvm::ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); } - - bool MaskedValueIsZero(Value *V, const APInt &Mask, + + bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth = 0) const { return llvm::MaskedValueIsZero(V, Mask, TD, Depth); } @@ -321,21 +325,26 @@ private: /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value /// based on the demanded bits. - Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, + Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt& KnownZero, APInt& KnownOne, unsigned Depth); - bool SimplifyDemandedBits(Use &U, APInt DemandedMask, + bool SimplifyDemandedBits(Use &U, APInt DemandedMask, APInt& KnownZero, APInt& KnownOne, unsigned Depth=0); - + /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded + /// bit for "r1 = shr x, c1; r2 = shl r1, c2" instruction sequence. + Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl, + APInt DemandedMask, APInt &KnownZero, + APInt &KnownOne); + /// SimplifyDemandedInstructionBits - Inst is an integer instruction that /// SimplifyDemandedBits knows about. See if the instruction has any /// properties that allow us to simplify its operands. bool SimplifyDemandedInstructionBits(Instruction &Inst); - + Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt& UndefElts, unsigned Depth = 0); - + // FoldOpIntoPhi - Given a binary operator, cast instruction, or select // which has a PHI node as operand #0, see if we can fold the instruction // into the PHI (which is only possible if all operands to the PHI are @@ -351,10 +360,10 @@ private: Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN); Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN); - + Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS, ConstantInt *AndRHS, BinaryOperator &TheAnd); - + Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask, bool isSub, Instruction &I); Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, @@ -373,8 +382,8 @@ private: Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap); }; - - + + } // end namespace llvm. #endif diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index d8257e6..7595da0 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -13,16 +13,840 @@ #include "InstCombine.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; +namespace { + + /// Class representing coefficient of floating-point addend. + /// This class needs to be highly efficient, which is especially true for + /// the constructor. As of I write this comment, the cost of the default + /// constructor is merely 4-byte-store-zero (Assuming compiler is able to + /// perform write-merging). + /// + class FAddendCoef { + public: + // The constructor has to initialize a APFloat, which is uncessary for + // most addends which have coefficient either 1 or -1. So, the constructor + // is expensive. In order to avoid the cost of the constructor, we should + // reuse some instances whenever possible. The pre-created instances + // FAddCombine::Add[0-5] embodies this idea. + // + FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {} + ~FAddendCoef(); + + void set(short C) { + assert(!insaneIntVal(C) && "Insane coefficient"); + IsFp = false; IntVal = C; + } + + void set(const APFloat& C); + + void negate(); + + bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); } + Value *getValue(Type *) const; + + // If possible, don't define operator+/operator- etc because these + // operators inevitably call FAddendCoef's constructor which is not cheap. + void operator=(const FAddendCoef &A); + void operator+=(const FAddendCoef &A); + void operator-=(const FAddendCoef &A); + void operator*=(const FAddendCoef &S); + + bool isOne() const { return isInt() && IntVal == 1; } + bool isTwo() const { return isInt() && IntVal == 2; } + bool isMinusOne() const { return isInt() && IntVal == -1; } + bool isMinusTwo() const { return isInt() && IntVal == -2; } + + private: + bool insaneIntVal(int V) { return V > 4 || V < -4; } + APFloat *getFpValPtr(void) + { return reinterpret_cast<APFloat*>(&FpValBuf.buffer[0]); } + const APFloat *getFpValPtr(void) const + { return reinterpret_cast<const APFloat*>(&FpValBuf.buffer[0]); } + + const APFloat &getFpVal(void) const { + assert(IsFp && BufHasFpVal && "Incorret state"); + return *getFpValPtr(); + } + + APFloat &getFpVal(void) + { assert(IsFp && BufHasFpVal && "Incorret state"); return *getFpValPtr(); } + + bool isInt() const { return !IsFp; } + + // If the coefficient is represented by an integer, promote it to a + // floating point. + void convertToFpType(const fltSemantics &Sem); + + // Construct an APFloat from a signed integer. + // TODO: We should get rid of this function when APFloat can be constructed + // from an *SIGNED* integer. + APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val); + private: + + bool IsFp; + + // True iff FpValBuf contains an instance of APFloat. + bool BufHasFpVal; + + // The integer coefficient of an individual addend is either 1 or -1, + // and we try to simplify at most 4 addends from neighboring at most + // two instructions. So the range of <IntVal> falls in [-4, 4]. APInt + // is overkill of this end. + short IntVal; + + AlignedCharArrayUnion<APFloat> FpValBuf; + }; + + /// FAddend is used to represent floating-point addend. An addend is + /// represented as <C, V>, where the V is a symbolic value, and C is a + /// constant coefficient. A constant addend is represented as <C, 0>. + /// + class FAddend { + public: + FAddend() { Val = 0; } + + Value *getSymVal (void) const { return Val; } + const FAddendCoef &getCoef(void) const { return Coeff; } + + bool isConstant() const { return Val == 0; } + bool isZero() const { return Coeff.isZero(); } + + void set(short Coefficient, Value *V) { Coeff.set(Coefficient), Val = V; } + void set(const APFloat& Coefficient, Value *V) + { Coeff.set(Coefficient); Val = V; } + void set(const ConstantFP* Coefficient, Value *V) + { Coeff.set(Coefficient->getValueAPF()); Val = V; } + + void negate() { Coeff.negate(); } + + /// Drill down the U-D chain one step to find the definition of V, and + /// try to break the definition into one or two addends. + static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1); + + /// Similar to FAddend::drillDownOneStep() except that the value being + /// splitted is the addend itself. + unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const; + + void operator+=(const FAddend &T) { + assert((Val == T.Val) && "Symbolic-values disagree"); + Coeff += T.Coeff; + } + + private: + void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; } + + // This addend has the value of "Coeff * Val". + Value *Val; + FAddendCoef Coeff; + }; + + /// FAddCombine is the class for optimizing an unsafe fadd/fsub along + /// with its neighboring at most two instructions. + /// + class FAddCombine { + public: + FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {} + Value *simplify(Instruction *FAdd); + + private: + typedef SmallVector<const FAddend*, 4> AddendVect; + + Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota); + + Value *performFactorization(Instruction *I); + + /// Convert given addend to a Value + Value *createAddendVal(const FAddend &A, bool& NeedNeg); + + /// Return the number of instructions needed to emit the N-ary addition. + unsigned calcInstrNumber(const AddendVect& Vect); + Value *createFSub(Value *Opnd0, Value *Opnd1); + Value *createFAdd(Value *Opnd0, Value *Opnd1); + Value *createFMul(Value *Opnd0, Value *Opnd1); + Value *createFDiv(Value *Opnd0, Value *Opnd1); + Value *createFNeg(Value *V); + Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota); + void createInstPostProc(Instruction *NewInst); + + InstCombiner::BuilderTy *Builder; + Instruction *Instr; + + private: + // Debugging stuff are clustered here. + #ifndef NDEBUG + unsigned CreateInstrNum; + void initCreateInstNum() { CreateInstrNum = 0; } + void incCreateInstNum() { CreateInstrNum++; } + #else + void initCreateInstNum() {} + void incCreateInstNum() {} + #endif + }; +} + +//===----------------------------------------------------------------------===// +// +// Implementation of +// {FAddendCoef, FAddend, FAddition, FAddCombine}. +// +//===----------------------------------------------------------------------===// +FAddendCoef::~FAddendCoef() { + if (BufHasFpVal) + getFpValPtr()->~APFloat(); +} + +void FAddendCoef::set(const APFloat& C) { + APFloat *P = getFpValPtr(); + + if (isInt()) { + // As the buffer is meanless byte stream, we cannot call + // APFloat::operator=(). + new(P) APFloat(C); + } else + *P = C; + + IsFp = BufHasFpVal = true; +} + +void FAddendCoef::convertToFpType(const fltSemantics &Sem) { + if (!isInt()) + return; + + APFloat *P = getFpValPtr(); + if (IntVal > 0) + new(P) APFloat(Sem, IntVal); + else { + new(P) APFloat(Sem, 0 - IntVal); + P->changeSign(); + } + IsFp = BufHasFpVal = true; +} + +APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) { + if (Val >= 0) + return APFloat(Sem, Val); + + APFloat T(Sem, 0 - Val); + T.changeSign(); + + return T; +} + +void FAddendCoef::operator=(const FAddendCoef &That) { + if (That.isInt()) + set(That.IntVal); + else + set(That.getFpVal()); +} + +void FAddendCoef::operator+=(const FAddendCoef &That) { + enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven; + if (isInt() == That.isInt()) { + if (isInt()) + IntVal += That.IntVal; + else + getFpVal().add(That.getFpVal(), RndMode); + return; + } + + if (isInt()) { + const APFloat &T = That.getFpVal(); + convertToFpType(T.getSemantics()); + getFpVal().add(T, RndMode); + return; + } + + APFloat &T = getFpVal(); + T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode); +} + +void FAddendCoef::operator-=(const FAddendCoef &That) { + enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven; + if (isInt() == That.isInt()) { + if (isInt()) + IntVal -= That.IntVal; + else + getFpVal().subtract(That.getFpVal(), RndMode); + return; + } + + if (isInt()) { + const APFloat &T = That.getFpVal(); + convertToFpType(T.getSemantics()); + getFpVal().subtract(T, RndMode); + return; + } + + APFloat &T = getFpVal(); + T.subtract(createAPFloatFromInt(T.getSemantics(), IntVal), RndMode); +} + +void FAddendCoef::operator*=(const FAddendCoef &That) { + if (That.isOne()) + return; + + if (That.isMinusOne()) { + negate(); + return; + } + + if (isInt() && That.isInt()) { + int Res = IntVal * (int)That.IntVal; + assert(!insaneIntVal(Res) && "Insane int value"); + IntVal = Res; + return; + } + + const fltSemantics &Semantic = + isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics(); + + if (isInt()) + convertToFpType(Semantic); + APFloat &F0 = getFpVal(); + + if (That.isInt()) + F0.multiply(createAPFloatFromInt(Semantic, That.IntVal), + APFloat::rmNearestTiesToEven); + else + F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven); + + return; +} + +void FAddendCoef::negate() { + if (isInt()) + IntVal = 0 - IntVal; + else + getFpVal().changeSign(); +} + +Value *FAddendCoef::getValue(Type *Ty) const { + return isInt() ? + ConstantFP::get(Ty, float(IntVal)) : + ConstantFP::get(Ty->getContext(), getFpVal()); +} + +// The definition of <Val> Addends +// ========================================= +// A + B <1, A>, <1,B> +// A - B <1, A>, <1,B> +// 0 - B <-1, B> +// C * A, <C, A> +// A + C <1, A> <C, NULL> +// 0 +/- 0 <0, NULL> (corner case) +// +// Legend: A and B are not constant, C is constant +// +unsigned FAddend::drillValueDownOneStep + (Value *Val, FAddend &Addend0, FAddend &Addend1) { + Instruction *I = 0; + if (Val == 0 || !(I = dyn_cast<Instruction>(Val))) + return 0; + + unsigned Opcode = I->getOpcode(); + + if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub) { + ConstantFP *C0, *C1; + Value *Opnd0 = I->getOperand(0); + Value *Opnd1 = I->getOperand(1); + if ((C0 = dyn_cast<ConstantFP>(Opnd0)) && C0->isZero()) + Opnd0 = 0; + + if ((C1 = dyn_cast<ConstantFP>(Opnd1)) && C1->isZero()) + Opnd1 = 0; + + if (Opnd0) { + if (!C0) + Addend0.set(1, Opnd0); + else + Addend0.set(C0, 0); + } + + if (Opnd1) { + FAddend &Addend = Opnd0 ? Addend1 : Addend0; + if (!C1) + Addend.set(1, Opnd1); + else + Addend.set(C1, 0); + if (Opcode == Instruction::FSub) + Addend.negate(); + } + + if (Opnd0 || Opnd1) + return Opnd0 && Opnd1 ? 2 : 1; + + // Both operands are zero. Weird! + Addend0.set(APFloat(C0->getValueAPF().getSemantics()), 0); + return 1; + } + + if (I->getOpcode() == Instruction::FMul) { + Value *V0 = I->getOperand(0); + Value *V1 = I->getOperand(1); + if (ConstantFP *C = dyn_cast<ConstantFP>(V0)) { + Addend0.set(C, V1); + return 1; + } + + if (ConstantFP *C = dyn_cast<ConstantFP>(V1)) { + Addend0.set(C, V0); + return 1; + } + } + + return 0; +} + +// Try to break *this* addend into two addends. e.g. Suppose this addend is +// <2.3, V>, and V = X + Y, by calling this function, we obtain two addends, +// i.e. <2.3, X> and <2.3, Y>. +// +unsigned FAddend::drillAddendDownOneStep + (FAddend &Addend0, FAddend &Addend1) const { + if (isConstant()) + return 0; + + unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1); + if (!BreakNum || Coeff.isOne()) + return BreakNum; + + Addend0.Scale(Coeff); + + if (BreakNum == 2) + Addend1.Scale(Coeff); + + return BreakNum; +} + +// Try to perform following optimization on the input instruction I. Return the +// simplified expression if was successful; otherwise, return 0. +// +// Instruction "I" is Simplified into +// ------------------------------------------------------- +// (x * y) +/- (x * z) x * (y +/- z) +// (y / x) +/- (z / x) (y +/- z) / x +// +Value *FAddCombine::performFactorization(Instruction *I) { + assert((I->getOpcode() == Instruction::FAdd || + I->getOpcode() == Instruction::FSub) && "Expect add/sub"); + + Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0)); + Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1)); + + if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode()) + return 0; + + bool isMpy = false; + if (I0->getOpcode() == Instruction::FMul) + isMpy = true; + else if (I0->getOpcode() != Instruction::FDiv) + return 0; + + Value *Opnd0_0 = I0->getOperand(0); + Value *Opnd0_1 = I0->getOperand(1); + Value *Opnd1_0 = I1->getOperand(0); + Value *Opnd1_1 = I1->getOperand(1); + + // Input Instr I Factor AddSub0 AddSub1 + // ---------------------------------------------- + // (x*y) +/- (x*z) x y z + // (y/x) +/- (z/x) x y z + // + Value *Factor = 0; + Value *AddSub0 = 0, *AddSub1 = 0; + + if (isMpy) { + if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1) + Factor = Opnd0_0; + else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1) + Factor = Opnd0_1; + + if (Factor) { + AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0; + AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0; + } + } else if (Opnd0_1 == Opnd1_1) { + Factor = Opnd0_1; + AddSub0 = Opnd0_0; + AddSub1 = Opnd1_0; + } + + if (!Factor) + return 0; + + // Create expression "NewAddSub = AddSub0 +/- AddsSub1" + Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ? + createFAdd(AddSub0, AddSub1) : + createFSub(AddSub0, AddSub1); + if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) { + const APFloat &F = CFP->getValueAPF(); + if (!F.isNormal() || F.isDenormal()) + return 0; + } + + if (isMpy) + return createFMul(Factor, NewAddSub); + + return createFDiv(NewAddSub, Factor); +} + +Value *FAddCombine::simplify(Instruction *I) { + assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode"); + + // Currently we are not able to handle vector type. + if (I->getType()->isVectorTy()) + return 0; + + assert((I->getOpcode() == Instruction::FAdd || + I->getOpcode() == Instruction::FSub) && "Expect add/sub"); + + // Save the instruction before calling other member-functions. + Instr = I; + + FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1; + + unsigned OpndNum = FAddend::drillValueDownOneStep(I, Opnd0, Opnd1); + + // Step 1: Expand the 1st addend into Opnd0_0 and Opnd0_1. + unsigned Opnd0_ExpNum = 0; + unsigned Opnd1_ExpNum = 0; + + if (!Opnd0.isConstant()) + Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1); + + // Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1. + if (OpndNum == 2 && !Opnd1.isConstant()) + Opnd1_ExpNum = Opnd1.drillAddendDownOneStep(Opnd1_0, Opnd1_1); + + // Step 3: Try to optimize Opnd0_0 + Opnd0_1 + Opnd1_0 + Opnd1_1 + if (Opnd0_ExpNum && Opnd1_ExpNum) { + AddendVect AllOpnds; + AllOpnds.push_back(&Opnd0_0); + AllOpnds.push_back(&Opnd1_0); + if (Opnd0_ExpNum == 2) + AllOpnds.push_back(&Opnd0_1); + if (Opnd1_ExpNum == 2) + AllOpnds.push_back(&Opnd1_1); + + // Compute instruction quota. We should save at least one instruction. + unsigned InstQuota = 0; + + Value *V0 = I->getOperand(0); + Value *V1 = I->getOperand(1); + InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) && + (!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1; + + if (Value *R = simplifyFAdd(AllOpnds, InstQuota)) + return R; + } + + if (OpndNum != 2) { + // The input instruction is : "I=0.0 +/- V". If the "V" were able to be + // splitted into two addends, say "V = X - Y", the instruction would have + // been optimized into "I = Y - X" in the previous steps. + // + const FAddendCoef &CE = Opnd0.getCoef(); + return CE.isOne() ? Opnd0.getSymVal() : 0; + } + + // step 4: Try to optimize Opnd0 + Opnd1_0 [+ Opnd1_1] + if (Opnd1_ExpNum) { + AddendVect AllOpnds; + AllOpnds.push_back(&Opnd0); + AllOpnds.push_back(&Opnd1_0); + if (Opnd1_ExpNum == 2) + AllOpnds.push_back(&Opnd1_1); + + if (Value *R = simplifyFAdd(AllOpnds, 1)) + return R; + } + + // step 5: Try to optimize Opnd1 + Opnd0_0 [+ Opnd0_1] + if (Opnd0_ExpNum) { + AddendVect AllOpnds; + AllOpnds.push_back(&Opnd1); + AllOpnds.push_back(&Opnd0_0); + if (Opnd0_ExpNum == 2) + AllOpnds.push_back(&Opnd0_1); + + if (Value *R = simplifyFAdd(AllOpnds, 1)) + return R; + } + + // step 6: Try factorization as the last resort, + return performFactorization(I); +} + +Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { + + unsigned AddendNum = Addends.size(); + assert(AddendNum <= 4 && "Too many addends"); + + // For saving intermediate results; + unsigned NextTmpIdx = 0; + FAddend TmpResult[3]; + + // Points to the constant addend of the resulting simplified expression. + // If the resulting expr has constant-addend, this constant-addend is + // desirable to reside at the top of the resulting expression tree. Placing + // constant close to supper-expr(s) will potentially reveal some optimization + // opportunities in super-expr(s). + // + const FAddend *ConstAdd = 0; + + // Simplified addends are placed <SimpVect>. + AddendVect SimpVect; + + // The outer loop works on one symbolic-value at a time. Suppose the input + // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ... + // The symbolic-values will be processed in this order: x, y, z. + // + for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) { + + const FAddend *ThisAddend = Addends[SymIdx]; + if (!ThisAddend) { + // This addend was processed before. + continue; + } + + Value *Val = ThisAddend->getSymVal(); + unsigned StartIdx = SimpVect.size(); + SimpVect.push_back(ThisAddend); + + // The inner loop collects addends sharing same symbolic-value, and these + // addends will be later on folded into a single addend. Following above + // example, if the symbolic value "y" is being processed, the inner loop + // will collect two addends "<b1,y>" and "<b2,Y>". These two addends will + // be later on folded into "<b1+b2, y>". + // + for (unsigned SameSymIdx = SymIdx + 1; + SameSymIdx < AddendNum; SameSymIdx++) { + const FAddend *T = Addends[SameSymIdx]; + if (T && T->getSymVal() == Val) { + // Set null such that next iteration of the outer loop will not process + // this addend again. + Addends[SameSymIdx] = 0; + SimpVect.push_back(T); + } + } + + // If multiple addends share same symbolic value, fold them together. + if (StartIdx + 1 != SimpVect.size()) { + FAddend &R = TmpResult[NextTmpIdx ++]; + R = *SimpVect[StartIdx]; + for (unsigned Idx = StartIdx + 1; Idx < SimpVect.size(); Idx++) + R += *SimpVect[Idx]; + + // Pop all addends being folded and push the resulting folded addend. + SimpVect.resize(StartIdx); + if (Val != 0) { + if (!R.isZero()) { + SimpVect.push_back(&R); + } + } else { + // Don't push constant addend at this time. It will be the last element + // of <SimpVect>. + ConstAdd = &R; + } + } + } + + assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) && + "out-of-bound access"); + + if (ConstAdd) + SimpVect.push_back(ConstAdd); + + Value *Result; + if (!SimpVect.empty()) + Result = createNaryFAdd(SimpVect, InstrQuota); + else { + // The addition is folded to 0.0. + Result = ConstantFP::get(Instr->getType(), 0.0); + } + + return Result; +} + +Value *FAddCombine::createNaryFAdd + (const AddendVect &Opnds, unsigned InstrQuota) { + assert(!Opnds.empty() && "Expect at least one addend"); + + // Step 1: Check if the # of instructions needed exceeds the quota. + // + unsigned InstrNeeded = calcInstrNumber(Opnds); + if (InstrNeeded > InstrQuota) + return 0; + + initCreateInstNum(); + + // step 2: Emit the N-ary addition. + // Note that at most three instructions are involved in Fadd-InstCombine: the + // addition in question, and at most two neighboring instructions. + // The resulting optimized addition should have at least one less instruction + // than the original addition expression tree. This implies that the resulting + // N-ary addition has at most two instructions, and we don't need to worry + // about tree-height when constructing the N-ary addition. + + Value *LastVal = 0; + bool LastValNeedNeg = false; + + // Iterate the addends, creating fadd/fsub using adjacent two addends. + for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end(); + I != E; I++) { + bool NeedNeg; + Value *V = createAddendVal(**I, NeedNeg); + if (!LastVal) { + LastVal = V; + LastValNeedNeg = NeedNeg; + continue; + } + + if (LastValNeedNeg == NeedNeg) { + LastVal = createFAdd(LastVal, V); + continue; + } + + if (LastValNeedNeg) + LastVal = createFSub(V, LastVal); + else + LastVal = createFSub(LastVal, V); + + LastValNeedNeg = false; + } + + if (LastValNeedNeg) { + LastVal = createFNeg(LastVal); + } + + #ifndef NDEBUG + assert(CreateInstrNum == InstrNeeded && + "Inconsistent in instruction numbers"); + #endif + + return LastVal; +} + +Value *FAddCombine::createFSub + (Value *Opnd0, Value *Opnd1) { + Value *V = Builder->CreateFSub(Opnd0, Opnd1); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); + return V; +} + +Value *FAddCombine::createFNeg(Value *V) { + Value *Zero = cast<Value>(ConstantFP::get(V->getType(), 0.0)); + return createFSub(Zero, V); +} + +Value *FAddCombine::createFAdd + (Value *Opnd0, Value *Opnd1) { + Value *V = Builder->CreateFAdd(Opnd0, Opnd1); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); + return V; +} + +Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) { + Value *V = Builder->CreateFMul(Opnd0, Opnd1); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); + return V; +} + +Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) { + Value *V = Builder->CreateFDiv(Opnd0, Opnd1); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); + return V; +} + +void FAddCombine::createInstPostProc(Instruction *NewInstr) { + NewInstr->setDebugLoc(Instr->getDebugLoc()); + + // Keep track of the number of instruction created. + incCreateInstNum(); + + // Propagate fast-math flags + NewInstr->setFastMathFlags(Instr->getFastMathFlags()); +} + +// Return the number of instruction needed to emit the N-ary addition. +// NOTE: Keep this function in sync with createAddendVal(). +unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) { + unsigned OpndNum = Opnds.size(); + unsigned InstrNeeded = OpndNum - 1; + + // The number of addends in the form of "(-1)*x". + unsigned NegOpndNum = 0; + + // Adjust the number of instructions needed to emit the N-ary add. + for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end(); + I != E; I++) { + const FAddend *Opnd = *I; + if (Opnd->isConstant()) + continue; + + const FAddendCoef &CE = Opnd->getCoef(); + if (CE.isMinusOne() || CE.isMinusTwo()) + NegOpndNum++; + + // Let the addend be "c * x". If "c == +/-1", the value of the addend + // is immediately available; otherwise, it needs exactly one instruction + // to evaluate the value. + if (!CE.isMinusOne() && !CE.isOne()) + InstrNeeded++; + } + if (NegOpndNum == OpndNum) + InstrNeeded++; + return InstrNeeded; +} + +// Input Addend Value NeedNeg(output) +// ================================================================ +// Constant C C false +// <+/-1, V> V coefficient is -1 +// <2/-2, V> "fadd V, V" coefficient is -2 +// <C, V> "fmul V, C" false +// +// NOTE: Keep this function in sync with FAddCombine::calcInstrNumber. +Value *FAddCombine::createAddendVal + (const FAddend &Opnd, bool &NeedNeg) { + const FAddendCoef &Coeff = Opnd.getCoef(); + + if (Opnd.isConstant()) { + NeedNeg = false; + return Coeff.getValue(Instr->getType()); + } + + Value *OpndVal = Opnd.getSymVal(); + + if (Coeff.isMinusOne() || Coeff.isOne()) { + NeedNeg = Coeff.isMinusOne(); + return OpndVal; + } + + if (Coeff.isTwo() || Coeff.isMinusTwo()) { + NeedNeg = Coeff.isMinusTwo(); + return createFAdd(OpndVal, OpndVal); + } + + NeedNeg = false; + return createFMul(OpndVal, Coeff.getValue(Instr->getType())); +} + /// AddOne - Add one to a ConstantInt. static Constant *AddOne(Constant *C) { return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); } + /// SubOne - Subtract one from a ConstantInt. static Constant *SubOne(ConstantInt *C) { return ConstantInt::get(C->getContext(), C->getValue()-1); @@ -37,10 +861,10 @@ static Constant *SubOne(ConstantInt *C) { static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) { if (!V->hasOneUse() || !V->getType()->isIntegerTy()) return 0; - + Instruction *I = dyn_cast<Instruction>(V); if (I == 0) return 0; - + if (I->getOpcode() == Instruction::Mul) if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) return I->getOperand(0); @@ -64,22 +888,22 @@ static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) { bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { // There are different heuristics we can use for this. Here are some simple // ones. - - // Add has the property that adding any two 2's complement numbers can only + + // Add has the property that adding any two 2's complement numbers can only // have one carry bit which can change a sign. As such, if LHS and RHS each // have at least two sign bits, we know that the addition of the two values // will sign extend fine. if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1) return true; - - + + // If one of the operands only has one non-zero bit, and if the other operand // has a known-zero bit in a more significant place than it (not including the // sign bit) the ripple may go up to and fill the zero, but won't change the // sign. For example, (X & ~4) + 1. - + // TODO: Implement. - + return false; } @@ -100,7 +924,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { const APInt &Val = CI->getValue(); if (Val.isSignBit()) return BinaryOperator::CreateXor(LHS, RHS); - + // See if SimplifyDemandedBits can simplify this. This handles stuff like // (X & 254)+1 -> (X&254)|1 if (SimplifyDemandedInstructionBits(I)) @@ -110,7 +934,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS)) if (ZI->getSrcTy()->isIntegerTy(1)) return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); - + Value *XorLHS = 0; ConstantInt *XorRHS = 0; if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); @@ -124,13 +948,13 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { else if (XorRHS->getValue().isPowerOf2()) ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1; } - + if (ExtendAmt) { APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt); if (!MaskedValueIsZero(XorLHS, Mask)) ExtendAmt = 0; } - + if (ExtendAmt) { Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt); Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext"); @@ -175,7 +999,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); return BinaryOperator::CreateNeg(NewAdd); } - + return BinaryOperator::CreateSub(RHS, LHSV); } @@ -209,7 +1033,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { APInt RHSKnownOne(IT->getBitWidth(), 0); APInt RHSKnownZero(IT->getBitWidth(), 0); ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); - + // No bits in common -> bitwise or. if ((LHSKnownZero|RHSKnownZero).isAllOnesValue()) return BinaryOperator::CreateOr(LHS, RHS); @@ -251,7 +1075,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // See if all bits from the first bit set in the Add RHS up are included // in the mask. First, get the rightmost bit. const APInt &AddRHSV = CRHS->getValue(); - + // Form a mask of all bits from the lowest bit added through the top. APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); @@ -289,7 +1113,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A)))) // Fold the add into the true select value. return SelectInst::Create(SI->getCondition(), N, A); - + if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A)))) // Fold the add into the false select value. return SelectInst::Create(SI->getCondition(), A, N); @@ -301,18 +1125,18 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) { // (add (sext x), cst) --> (sext (add x, cst')) if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) { - Constant *CI = + Constant *CI = ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); if (LHSConv->hasOneUse() && ConstantExpr::getSExt(CI, I.getType()) == RHSC && WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { // Insert the new, smaller add. - Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv"); return new SExtInst(NewAdd, I.getType()); } } - + // (add (sext x), (sext y)) --> (sext (add int x, y)) if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) { // Only do this if x/y have the same type, if at last one of them has a @@ -323,7 +1147,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { WillNotOverflowSignedAdd(LHSConv->getOperand(0), RHSConv->getOperand(0))) { // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv"); return new SExtInst(NewAdd, I.getType()); } @@ -351,18 +1175,12 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - if (Constant *RHSC = dyn_cast<Constant>(RHS)) { - // X + 0 --> X - if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) { - if (CFP->isExactlyValue(ConstantFP::getNegativeZero - (I.getType())->getValueAPF())) - return ReplaceInstUsesWith(I, LHS); - } + if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), TD)) + return ReplaceInstUsesWith(I, V); - if (isa<PHINode>(LHS)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } + if (isa<Constant>(RHS) && isa<PHINode>(LHS)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; // -A + B --> B - A // -A + -B --> -(A + B) @@ -374,11 +1192,6 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { if (Value *V = dyn_castFNegVal(RHS)) return BinaryOperator::CreateFSub(LHS, V); - // Check for X+0.0. Simplify it to X if we know X is not -0.0. - if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) - if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS)) - return ReplaceInstUsesWith(I, LHS); - // Check for (fadd double (sitofp x), y), see if we can merge this into an // integer add followed by a promotion. if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) { @@ -388,7 +1201,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { // requires a constant pool load, and generally allows the add to be better // instcombined. if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) { - Constant *CI = + Constant *CI = ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType()); if (LHSConv->hasOneUse() && ConstantExpr::getSIToFP(CI, I.getType()) == CFP && @@ -399,7 +1212,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { return new SIToFPInst(NewAdd, I.getType()); } } - + // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) { // Only do this if x/y have the same type, if at last one of them has a @@ -410,13 +1223,18 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { WillNotOverflowSignedAdd(LHSConv->getOperand(0), RHSConv->getOperand(0))) { // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), RHSConv->getOperand(0),"addconv"); return new SIToFPInst(NewAdd, I.getType()); } } } - + + if (I.hasUnsafeAlgebra()) { + if (Value *V = FAddCombine(Builder).simplify(&I)) + return ReplaceInstUsesWith(I, V); + } + return Changed ? &I : 0; } @@ -428,7 +1246,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, Type *Ty) { assert(TD && "Must have target data info for this"); - + // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize // this. bool Swapped = false; @@ -451,7 +1269,7 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, } } } - + if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) { // X - (gep X, ...) if (RHSGEP->getOperand(0) == LHS) { @@ -467,16 +1285,16 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, } } } - + // Avoid duplicating the arithmetic if GEP2 has non-constant indices and // multiple users. if (GEP1 == 0 || (GEP2 != 0 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse())) return 0; - + // Emit the offset of the GEP and an intptr_t. Value *Result = EmitGEPOffset(GEP1); - + // If we had a constant expression GEP on the other side offsetting the // pointer, subtract it from the offset we have. if (GEP2) { @@ -517,7 +1335,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // Replace (-1 - A) with (~A). if (match(Op0, m_AllOnes())) return BinaryOperator::CreateNot(Op1); - + if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) { // C - ~X == X + (1+C) Value *X = 0; @@ -551,20 +1369,30 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (SimplifyDemandedInstructionBits(I)) return &I; + + // Fold (sub 0, (zext bool to B)) --> (sext bool to B) + if (C->isZero() && match(Op1, m_ZExt(m_Value(X)))) + if (X->getType()->isIntegerTy(1)) + return CastInst::CreateSExtOrBitCast(X, Op1->getType()); + + // Fold (sub 0, (sext bool to B)) --> (zext bool to B) + if (C->isZero() && match(Op1, m_SExt(m_Value(X)))) + if (X->getType()->isIntegerTy(1)) + return CastInst::CreateZExtOrBitCast(X, Op1->getType()); } - + { Value *Y; // X-(X+Y) == -Y X-(Y+X) == -Y if (match(Op1, m_Add(m_Specific(Op0), m_Value(Y))) || match(Op1, m_Add(m_Value(Y), m_Specific(Op0)))) return BinaryOperator::CreateNeg(Y); - + // (X-Y)-X == -Y if (match(Op0, m_Sub(m_Specific(Op1), m_Value(Y)))) return BinaryOperator::CreateNeg(Y); } - + if (Op1->hasOneUse()) { Value *X = 0, *Y = 0, *Z = 0; Constant *C = 0; @@ -581,7 +1409,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { match(Op1, m_And(m_Specific(Op0), m_Value(Y)))) return BinaryOperator::CreateAnd(Op0, Builder->CreateNot(Y, Y->getName() + ".not")); - + // 0 - (X sdiv C) -> (X sdiv -C) if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && match(Op0, m_Zero())) @@ -604,14 +1432,14 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { C = ConstantExpr::getSub(One, ConstantExpr::getShl(One, CI)); return BinaryOperator::CreateMul(Op0, C); } - + // X - A*-B -> X + A*B // X - -A*B -> X + A*B Value *A, *B; if (match(Op1, m_Mul(m_Value(A), m_Neg(m_Value(B)))) || match(Op1, m_Mul(m_Neg(m_Value(A)), m_Value(B)))) return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B)); - + // X - A*CI -> X + A*-CI // X - CI*A -> X + A*-CI if (match(Op1, m_Mul(m_Value(A), m_ConstantInt(CI))) || @@ -630,7 +1458,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (X == dyn_castFoldableMul(Op1, C2)) return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2)); } - + // Optimize pointer differences into the same array into a size. Consider: // &A[10] - &A[0]: we should compile this to "10". if (TD) { @@ -639,23 +1467,31 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { match(Op1, m_PtrToInt(m_Value(RHSOp)))) if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) return ReplaceInstUsesWith(I, Res); - + // trunc(p)-trunc(q) -> trunc(p-q) if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) && match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) return ReplaceInstUsesWith(I, Res); } - + return 0; } Instruction *InstCombiner::visitFSub(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), TD)) + return ReplaceInstUsesWith(I, V); + // If this is a 'B = x-(-A)', change to B = x+A... if (Value *V = dyn_castFNegVal(Op1)) return BinaryOperator::CreateFAdd(Op0, V); + if (I.hasUnsafeAlgebra()) { + if (Value *V = FAddCombine(Builder).simplify(&I)) + return ReplaceInstUsesWith(I, V); + } + return 0; } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 7d0af0d..990cbc3 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -12,18 +12,18 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/Intrinsics.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Transforms/Utils/CmpInstAnalysis.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Transforms/Utils/CmpInstAnalysis.h" using namespace llvm; using namespace PatternMatch; /// AddOne - Add one to a ConstantInt. -static Constant *AddOne(Constant *C) { - return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); +static Constant *AddOne(ConstantInt *C) { + return ConstantInt::get(C->getContext(), C->getValue() + 1); } /// SubOne - Subtract one from a ConstantInt. static Constant *SubOne(ConstantInt *C) { @@ -36,15 +36,15 @@ static inline bool isFreeToInvert(Value *V) { // ~(~(X)) -> X. if (BinaryOperator::isNot(V)) return true; - + // Constants can be considered to be not'ed values. if (isa<ConstantInt>(V)) return true; - + // Compares can be inverted if they have a single use. if (CmpInst *CI = dyn_cast<CmpInst>(V)) return CI->hasOneUse(); - + return false; } @@ -56,7 +56,7 @@ static inline Value *dyn_castNotVal(Value *V) { if (!isFreeToInvert(Operand)) return Operand; } - + // Constants can be considered to be not'ed values... if (ConstantInt *C = dyn_cast<ConstantInt>(V)) return ConstantInt::get(C->getType(), ~C->getValue()); @@ -91,7 +91,7 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) { } /// getNewICmpValue - This is the complement of getICmpCode, which turns an -/// opcode and two operands into either a constant true or false, or a brand +/// opcode and two operands into either a constant true or false, or a brand /// new ICmp instruction. The sign is passed in to determine which kind /// of predicate to use in the new icmp instruction. static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS, @@ -118,7 +118,7 @@ static Value *getFCmpValue(bool isordered, unsigned code, case 4: Pred = isordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; break; case 5: Pred = isordered ? FCmpInst::FCMP_ONE : FCmpInst::FCMP_UNE; break; case 6: Pred = isordered ? FCmpInst::FCMP_OLE : FCmpInst::FCMP_ULE; break; - case 7: + case 7: if (!isordered) return ConstantInt::getTrue(LHS->getContext()); Pred = FCmpInst::FCMP_ORD; break; } @@ -154,7 +154,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, Or->takeName(Op); return BinaryOperator::CreateAnd(Or, AndRHS); } - + ConstantInt *TogetherCI = dyn_cast<ConstantInt>(Together); if (TogetherCI && !TogetherCI->isZero()){ // (X | C1) & C2 --> (X & (C2^(C1&C2))) | C1 @@ -166,7 +166,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, return BinaryOperator::CreateOr(And, OpRHS); } } - + break; case Instruction::Add: if (Op->hasOneUse()) { @@ -215,7 +215,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, if (CI->getValue() == ShlMask) // Masking out bits that the shift already masks. return ReplaceInstUsesWith(TheAnd, Op); // No need for the and. - + if (CI != AndRHS) { // Reducing bits set in and. TheAnd.setOperand(1, CI); return &TheAnd; @@ -236,7 +236,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, if (CI->getValue() == ShrMask) // Masking out bits that the shift already masks. return ReplaceInstUsesWith(TheAnd, Op); - + if (CI != AndRHS) { TheAnd.setOperand(1, CI); // Reduce bits set in and cst. return &TheAnd; @@ -269,22 +269,22 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, /// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is /// true, otherwise (V < Lo || V >= Hi). In practice, we emit the more efficient -/// (V-Lo) <u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates +/// (V-Lo) \<u Hi-Lo. This method expects that Lo <= Hi. isSigned indicates /// whether to treat the V, Lo and HI as signed or not. IB is the location to /// insert new instructions. Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, bool isSigned, bool Inside) { - assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ? + assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ? ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() && "Lo is not <= Hi in range emission code!"); - + if (Inside) { if (Lo == Hi) // Trivially false. return ConstantInt::getFalse(V->getContext()); // V >= Min && V < Hi --> V < Hi if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) { - ICmpInst::Predicate pred = (isSigned ? + ICmpInst::Predicate pred = (isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT); return Builder->CreateICmp(pred, V, Hi); } @@ -302,7 +302,7 @@ Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, // V < Min || V >= Hi -> V > Hi-1 Hi = SubOne(cast<ConstantInt>(Hi)); if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) { - ICmpInst::Predicate pred = (isSigned ? + ICmpInst::Predicate pred = (isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT); return Builder->CreateICmp(pred, V, Hi); } @@ -327,14 +327,14 @@ static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) { // look for the first zero bit after the run of ones MB = BitWidth - ((V - 1) ^ V).countLeadingZeros(); // look for the first non-zero bit - ME = V.getActiveBits(); + ME = V.getActiveBits(); return true; } /// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask, /// where isSub determines whether the operator is a sub. If we can fold one of /// the following xforms: -/// +/// /// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask /// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 /// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0 @@ -355,8 +355,8 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, case Instruction::And: if (ConstantExpr::getAnd(N, Mask) == Mask) { // If the AndRHS is a power of two minus one (0+1+), this is simple. - if ((Mask->getValue().countLeadingZeros() + - Mask->getValue().countPopulation()) == + if ((Mask->getValue().countLeadingZeros() + + Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()) break; @@ -375,33 +375,33 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, case Instruction::Or: case Instruction::Xor: // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0 - if ((Mask->getValue().countLeadingZeros() + + if ((Mask->getValue().countLeadingZeros() + Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth() && ConstantExpr::getAnd(N, Mask)->isNullValue()) break; return 0; } - + if (isSub) return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold"); return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold"); } /// enum for classifying (icmp eq (A & B), C) and (icmp ne (A & B), C) -/// One of A and B is considered the mask, the other the value. This is -/// described as the "AMask" or "BMask" part of the enum. If the enum +/// One of A and B is considered the mask, the other the value. This is +/// described as the "AMask" or "BMask" part of the enum. If the enum /// contains only "Mask", then both A and B can be considered masks. /// If A is the mask, then it was proven, that (A & C) == C. This /// is trivial if C == A, or C == 0. If both A and C are constants, this /// proof is also easy. /// For the following explanations we assume that A is the mask. -/// The part "AllOnes" declares, that the comparison is true only +/// The part "AllOnes" declares, that the comparison is true only /// if (A & B) == A, or all bits of A are set in B. /// Example: (icmp eq (A & 3), 3) -> FoldMskICmp_AMask_AllOnes -/// The part "AllZeroes" declares, that the comparison is true only +/// The part "AllZeroes" declares, that the comparison is true only /// if (A & B) == 0, or all bits of A are cleared in B. /// Example: (icmp eq (A & 3), 0) -> FoldMskICmp_Mask_AllZeroes -/// The part "Mixed" declares, that (A & B) == C and C might or might not +/// The part "Mixed" declares, that (A & B) == C and C might or might not /// contain any number of one bits and zero bits. /// Example: (icmp eq (A & 3), 1) -> FoldMskICmp_AMask_Mixed /// The Part "Not" means, that in above descriptions "==" should be replaced @@ -425,16 +425,16 @@ enum MaskedICmpType { /// return the set of pattern classes (from MaskedICmpType) /// that (icmp SCC (A & B), C) satisfies -static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, +static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, ICmpInst::Predicate SCC) { ConstantInt *ACst = dyn_cast<ConstantInt>(A); ConstantInt *BCst = dyn_cast<ConstantInt>(B); ConstantInt *CCst = dyn_cast<ConstantInt>(C); bool icmp_eq = (SCC == ICmpInst::ICMP_EQ); - bool icmp_abit = (ACst != 0 && !ACst->isZero() && + bool icmp_abit = (ACst != 0 && !ACst->isZero() && ACst->getValue().isPowerOf2()); - bool icmp_bbit = (BCst != 0 && !BCst->isZero() && + bool icmp_bbit = (BCst != 0 && !BCst->isZero() && BCst->getValue().isPowerOf2()); unsigned result = 0; if (CCst != 0 && CCst->isZero()) { @@ -449,12 +449,12 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, FoldMskICmp_BMask_NotMixed)); if (icmp_abit) result |= (icmp_eq ? (FoldMskICmp_AMask_NotAllOnes | - FoldMskICmp_AMask_NotMixed) + FoldMskICmp_AMask_NotMixed) : (FoldMskICmp_AMask_AllOnes | FoldMskICmp_AMask_Mixed)); if (icmp_bbit) result |= (icmp_eq ? (FoldMskICmp_BMask_NotAllOnes | - FoldMskICmp_BMask_NotMixed) + FoldMskICmp_BMask_NotMixed) : (FoldMskICmp_BMask_AllOnes | FoldMskICmp_BMask_Mixed)); return result; @@ -469,26 +469,23 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, FoldMskICmp_AMask_NotMixed) : (FoldMskICmp_Mask_AllZeroes | FoldMskICmp_AMask_Mixed)); - } - else if (ACst != 0 && CCst != 0 && - ConstantExpr::getAnd(ACst, CCst) == CCst) { + } else if (ACst != 0 && CCst != 0 && + ConstantExpr::getAnd(ACst, CCst) == CCst) { result |= (icmp_eq ? FoldMskICmp_AMask_Mixed : FoldMskICmp_AMask_NotMixed); } - if (B == C) - { + if (B == C) { result |= (icmp_eq ? (FoldMskICmp_BMask_AllOnes | FoldMskICmp_BMask_Mixed) : (FoldMskICmp_BMask_NotAllOnes | FoldMskICmp_BMask_NotMixed)); if (icmp_bbit) result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes | - FoldMskICmp_BMask_NotMixed) + FoldMskICmp_BMask_NotMixed) : (FoldMskICmp_Mask_AllZeroes | FoldMskICmp_BMask_Mixed)); - } - else if (BCst != 0 && CCst != 0 && - ConstantExpr::getAnd(BCst, CCst) == CCst) { + } else if (BCst != 0 && CCst != 0 && + ConstantExpr::getAnd(BCst, CCst) == CCst) { result |= (icmp_eq ? FoldMskICmp_BMask_Mixed : FoldMskICmp_BMask_NotMixed); } @@ -531,7 +528,7 @@ static bool decomposeBitTestICmp(const ICmpInst *I, ICmpInst::Predicate &Pred, /// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) /// return the set of pattern classes (from MaskedICmpType) /// that both LHS and RHS satisfy -static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, +static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, Value*& B, Value*& C, Value*& D, Value*& E, ICmpInst *LHS, ICmpInst *RHS, @@ -542,10 +539,10 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, if (LHS->getOperand(0)->getType()->isVectorTy()) return 0; // Here comes the tricky part: - // LHS might be of the form L11 & L12 == X, X == L21 & L22, + // LHS might be of the form L11 & L12 == X, X == L21 & L22, // and L11 & L12 == L21 & L22. The same goes for RHS. // Now we must find those components L** and R**, that are equal, so - // that we can extract the parameters A, B, C, D, and E for the canonical + // that we can extract the parameters A, B, C, D, and E for the canonical // above. Value *L1 = LHS->getOperand(0); Value *L2 = LHS->getOperand(1); @@ -610,14 +607,11 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, if (L11 == A) { B = L12; C = L2; - } - else if (L12 == A) { + } else if (L12 == A) { B = L11; C = L2; - } - else if (L21 == A) { + } else if (L21 == A) { B = L22; C = L1; - } - else if (L22 == A) { + } else if (L22 == A) { B = L21; C = L1; } @@ -643,32 +637,32 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, mask >>= 1; // treat "Not"-states as normal states if (mask & FoldMskICmp_Mask_AllZeroes) { - // (icmp eq (A & B), 0) & (icmp eq (A & D), 0) + // (icmp eq (A & B), 0) & (icmp eq (A & D), 0) // -> (icmp eq (A & (B|D)), 0) Value* newOr = Builder->CreateOr(B, D); Value* newAnd = Builder->CreateAnd(A, newOr); // we can't use C as zero, because we might actually handle - // (icmp ne (A & B), B) & (icmp ne (A & D), D) + // (icmp ne (A & B), B) & (icmp ne (A & D), D) // with B and D, having a single bit set Value* zero = Constant::getNullValue(A->getType()); return Builder->CreateICmp(NEWCC, newAnd, zero); } - else if (mask & FoldMskICmp_BMask_AllOnes) { - // (icmp eq (A & B), B) & (icmp eq (A & D), D) + if (mask & FoldMskICmp_BMask_AllOnes) { + // (icmp eq (A & B), B) & (icmp eq (A & D), D) // -> (icmp eq (A & (B|D)), (B|D)) Value* newOr = Builder->CreateOr(B, D); Value* newAnd = Builder->CreateAnd(A, newOr); return Builder->CreateICmp(NEWCC, newAnd, newOr); - } - else if (mask & FoldMskICmp_AMask_AllOnes) { - // (icmp eq (A & B), A) & (icmp eq (A & D), A) + } + if (mask & FoldMskICmp_AMask_AllOnes) { + // (icmp eq (A & B), A) & (icmp eq (A & D), A) // -> (icmp eq (A & (B&D)), A) Value* newAnd1 = Builder->CreateAnd(B, D); Value* newAnd = Builder->CreateAnd(A, newAnd1); return Builder->CreateICmp(NEWCC, newAnd, A); } - else if (mask & FoldMskICmp_BMask_Mixed) { - // (icmp eq (A & B), C) & (icmp eq (A & D), E) + if (mask & FoldMskICmp_BMask_Mixed) { + // (icmp eq (A & B), C) & (icmp eq (A & D), E) // We already know that B & C == C && D & E == E. // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of // C and E, which are shared by both the mask B and the mask D, don't @@ -680,7 +674,7 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, ConstantInt *DCst = dyn_cast<ConstantInt>(D); if (DCst == 0) return 0; // we can't simply use C and E, because we might actually handle - // (icmp ne (A & B), B) & (icmp eq (A & D), D) + // (icmp ne (A & B), B) & (icmp eq (A & D), D) // with B and D, having a single bit set ConstantInt *CCst = dyn_cast<ConstantInt>(C); @@ -727,13 +721,13 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { // handle (roughly): (icmp eq (A & B), C) & (icmp eq (A & D), E) if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_EQ, Builder)) return V; - + // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0); ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1)); ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1)); if (LHSCst == 0 || RHSCst == 0) return 0; - + if (LHSCst == RHSCst && LHSCC == RHSCC) { // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C) // where C is a power of 2 @@ -742,7 +736,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { Value *NewOr = Builder->CreateOr(Val, Val2); return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } - + // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) { Value *NewOr = Builder->CreateOr(Val, Val2); @@ -759,14 +753,13 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { ConstantInt *AndCst, *SmallCst = 0, *BigCst = 0; // (trunc x) == C1 & (and x, CA) == C2 + // (and x, CA) == C2 & (trunc x) == C1 if (match(Val2, m_Trunc(m_Value(V))) && match(Val, m_And(m_Specific(V), m_ConstantInt(AndCst)))) { SmallCst = RHSCst; BigCst = LHSCst; - } - // (and x, CA) == C2 & (trunc x) == C1 - else if (match(Val, m_Trunc(m_Value(V))) && - match(Val2, m_And(m_Specific(V), m_ConstantInt(AndCst)))) { + } else if (match(Val, m_Trunc(m_Value(V))) && + match(Val2, m_And(m_Specific(V), m_ConstantInt(AndCst)))) { SmallCst = LHSCst; BigCst = RHSCst; } @@ -789,7 +782,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { // From here on, we only handle: // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler. if (Val != Val2) return 0; - + // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || @@ -799,9 +792,9 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { // Make a constant range that's the intersection of the two icmp ranges. // If the intersection is empty, we know that the result is false. - ConstantRange LHSRange = + ConstantRange LHSRange = ConstantRange::makeICmpRegion(LHSCC, LHSCst->getValue()); - ConstantRange RHSRange = + ConstantRange RHSRange = ConstantRange::makeICmpRegion(RHSCC, RHSCst->getValue()); if (LHSRange.intersectWith(RHSRange).isEmptySet()) @@ -810,16 +803,16 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { // We can't fold (ugt x, C) & (sgt x, C2). if (!PredicatesFoldable(LHSCC, RHSCC)) return 0; - + // Ensure that the larger constant is on the RHS. bool ShouldSwap; if (CmpInst::isSigned(LHSCC) || - (ICmpInst::isEquality(LHSCC) && + (ICmpInst::isEquality(LHSCC) && CmpInst::isSigned(RHSCC))) ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); else ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); - + if (ShouldSwap) { std::swap(LHS, RHS); std::swap(LHSCst, RHSCst); @@ -829,8 +822,8 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { // At this point, we know we have two icmp instructions // comparing a value against two constants and and'ing the result // together. Because of the above check, we know that we only have - // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know - // (from the icmp folding check above), that the two constants + // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know + // (from the icmp folding check above), that the two constants // are not equal and that the larger constant is on the RHS assert(LHSCst != RHSCst && "Compares not folded above?"); @@ -932,7 +925,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { } break; } - + return 0; } @@ -951,7 +944,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { return ConstantInt::getFalse(LHS->getContext()); return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0)); } - + // Handle vector zeros. This occurs because the canonical form of // "fcmp ord x,x" is "fcmp ord x, 0". if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && @@ -959,18 +952,18 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0)); return 0; } - + Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); - - + + if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { // Swap RHS operands to match LHS. Op1CC = FCmpInst::getSwappedPredicate(Op1CC); std::swap(Op1LHS, Op1RHS); } - + if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y). if (Op0CC == Op1CC) @@ -981,7 +974,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { return RHS; if (Op1CC == FCmpInst::FCMP_TRUE) return LHS; - + bool Op0Ordered; bool Op1Ordered; unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); @@ -1001,7 +994,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { return LHS; if (Op0Ordered && (Op0Ordered == Op1Ordered)) return RHS; - + // uno && oeq -> uno && (ord && eq) -> false if (!Op0Ordered) return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); @@ -1025,10 +1018,10 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (Value *V = SimplifyUsingDistributiveLaws(I)) return ReplaceInstUsesWith(I, V); - // See if we can simplify any instructions used by the instruction whose sole + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) - return &I; + return &I; if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) { const APInt &AndRHSMask = AndRHS->getValue(); @@ -1043,7 +1036,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { case Instruction::Or: { // If the mask is only needed on one incoming arm, push it up. if (!Op0I->hasOneUse()) break; - + APInt NotAndRHS(~AndRHSMask); if (MaskedValueIsZero(Op0LHS, NotAndRHS)) { // Not masking anything out for the LHS, move to RHS. @@ -1103,12 +1096,12 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } break; } - + if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I)) return Res; } - + // If this is an integer truncation, and if the source is an 'and' with // immediate, transform it. This frequently occurs for bitfield accesses. { @@ -1116,7 +1109,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (match(Op0, m_Trunc(m_And(m_Value(X), m_ConstantInt(YC))))) { // Change: and (trunc (and X, YC) to T), C2 // into : and (trunc X to T), trunc(YC) & C2 - // This will fold the two constants together, which may allow + // This will fold the two constants together, which may allow // other simplifications. Value *NewCast = Builder->CreateTrunc(X, I.getType(), "and.shrunk"); Constant *C3 = ConstantExpr::getTrunc(YC, I.getType()); @@ -1143,7 +1136,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { I.getName()+".demorgan"); return BinaryOperator::CreateNot(Or); } - + { Value *A = 0, *B = 0, *C = 0, *D = 0; // (A|B) & ~(A&B) -> A^B @@ -1151,13 +1144,13 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) && ((A == C && B == D) || (A == D && B == C))) return BinaryOperator::CreateXor(A, B); - + // ~(A&B) & (A|B) -> A^B if (match(Op1, m_Or(m_Value(A), m_Value(B))) && match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) && ((A == C && B == D) || (A == D && B == C))) return BinaryOperator::CreateXor(A, B); - + // A&(A^B) => A & ~B { Value *tmpOp0 = Op0; @@ -1193,19 +1186,19 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0))))) return BinaryOperator::CreateAnd(A, Op0); } - + if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1)) if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0)) if (Value *Res = FoldAndOfICmps(LHS, RHS)) return ReplaceInstUsesWith(I, Res); - + // If and'ing two fcmp, try combine them into one. if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) if (Value *Res = FoldAndOfFCmps(LHS, RHS)) return ReplaceInstUsesWith(I, Res); - - + + // fold (and (cast A), (cast B)) -> (cast (and A, B)) if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) { @@ -1214,21 +1207,21 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntOrIntVectorTy()) { Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0); - + // Only do this if the casts both really cause code to be generated. if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) && ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) { Value *NewOp = Builder->CreateAnd(Op0COp, Op1COp, I.getName()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); } - + // If this is and(cast(icmp), cast(icmp)), try to fold this even if the // cast is otherwise not optimizable. This happens for vector sexts. if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp)) if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp)) if (Value *Res = FoldAndOfICmps(LHS, RHS)) return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); - + // If this is and(cast(fcmp), cast(fcmp)), try to fold this even if the // cast is otherwise not optimizable. This happens for vector sexts. if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp)) @@ -1237,21 +1230,49 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); } } - + // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts. if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) { if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0)) - if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && + if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && SI0->getOperand(1) == SI1->getOperand(1) && (SI0->hasOneUse() || SI1->hasOneUse())) { Value *NewOp = Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0), SI0->getName()); - return BinaryOperator::Create(SI1->getOpcode(), NewOp, + return BinaryOperator::Create(SI1->getOpcode(), NewOp, SI1->getOperand(1)); } } + { + Value *X = 0; + bool OpsSwapped = false; + // Canonicalize SExt or Not to the LHS + if (match(Op1, m_SExt(m_Value())) || + match(Op1, m_Not(m_Value()))) { + std::swap(Op0, Op1); + OpsSwapped = true; + } + + // Fold (and (sext bool to A), B) --> (select bool, B, 0) + if (match(Op0, m_SExt(m_Value(X))) && + X->getType()->getScalarType()->isIntegerTy(1)) { + Value *Zero = Constant::getNullValue(Op1->getType()); + return SelectInst::Create(X, Op1, Zero); + } + + // Fold (and ~(sext bool to A), B) --> (select bool, 0, B) + if (match(Op0, m_Not(m_SExt(m_Value(X)))) && + X->getType()->getScalarType()->isIntegerTy(1)) { + Value *Zero = Constant::getNullValue(Op0->getType()); + return SelectInst::Create(X, Zero, Op1); + } + + if (OpsSwapped) + std::swap(Op0, Op1); + } + return Changed ? &I : 0; } @@ -1288,11 +1309,11 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask, ByteValues); } - + // If this is a logical shift by a constant multiple of 8, recurse with // OverallLeftShift and ByteMask adjusted. if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) { - unsigned ShAmt = + unsigned ShAmt = cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U); // Ensure the shift amount is defined and of a byte value. if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size())) @@ -1313,7 +1334,7 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, if (OverallLeftShift >= (int)ByteValues.size()) return true; if (OverallLeftShift <= -(int)ByteValues.size()) return true; - return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, + return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, ByteValues); } @@ -1325,20 +1346,20 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, unsigned NumBytes = ByteValues.size(); APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255); const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue(); - + for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) { // If this byte is masked out by a later operation, we don't care what // the and mask is. if ((ByteMask & (1 << i)) == 0) continue; - + // If the AndMask is all zeros for this byte, clear the bit. APInt MaskB = AndMask & Byte; if (MaskB == 0) { ByteMask &= ~(1U << i); continue; } - + // If the AndMask is not all ones for this byte, it's not a bytezap. if (MaskB != Byte) return true; @@ -1346,11 +1367,11 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, // Otherwise, this byte is kept. } - return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, + return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, ByteValues); } } - + // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be // the input value to the bswap. Some observations: 1) if more than one byte // is demanded from this input, then it could not be successfully assembled @@ -1358,7 +1379,7 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, // their ultimate destination. if (!isPowerOf2_32(ByteMask)) return true; unsigned InputByteNo = CountTrailingZeros_32(ByteMask); - + // 2) The input and ultimate destinations must line up: if byte 3 of an i32 // is demanded, it needs to go into byte 0 of the result. This means that the // byte needs to be shifted until it lands in the right byte bucket. The @@ -1368,7 +1389,7 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, unsigned DestByteNo = InputByteNo + OverallLeftShift; if (ByteValues.size()-1-DestByteNo != InputByteNo) return true; - + // If the destination byte value is already defined, the values are or'd // together, which isn't a bswap (unless it's an or of the same bits). if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V) @@ -1381,25 +1402,25 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, /// If so, insert the new bswap intrinsic and return it. Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { IntegerType *ITy = dyn_cast<IntegerType>(I.getType()); - if (!ITy || ITy->getBitWidth() % 16 || + if (!ITy || ITy->getBitWidth() % 16 || // ByteMask only allows up to 32-byte values. - ITy->getBitWidth() > 32*8) + ITy->getBitWidth() > 32*8) return 0; // Can only bswap pairs of bytes. Can't do vectors. - + /// ByteValues - For each byte of the result, we keep track of which value /// defines each byte. SmallVector<Value*, 8> ByteValues; ByteValues.resize(ITy->getBitWidth()/8); - + // Try to find all the pieces corresponding to the bswap. uint32_t ByteMask = ~0U >> (32-ByteValues.size()); if (CollectBSwapParts(&I, 0, ByteMask, ByteValues)) return 0; - + // Check to see if all of the bytes come from the same value. Value *V = ByteValues[0]; if (V == 0) return 0; // Didn't find a byte? Must be zero. - + // Check to make sure that all of the bytes come from the same value. for (unsigned i = 1, e = ByteValues.size(); i != e; ++i) if (ByteValues[i] != V) @@ -1425,7 +1446,7 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B, return SelectInst::Create(Cond, C, B); if (match(D, m_SExt(m_Not(m_Specific(Cond))))) return SelectInst::Create(Cond, C, B); - + // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D. if (match(B, m_Not(m_SExt(m_Specific(Cond))))) return SelectInst::Create(Cond, C, D); @@ -1483,33 +1504,33 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { // From here on, we only handle: // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler. if (Val != Val2) return 0; - + // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) return 0; - + // We can't fold (ugt x, C) | (sgt x, C2). if (!PredicatesFoldable(LHSCC, RHSCC)) return 0; - + // Ensure that the larger constant is on the RHS. bool ShouldSwap; if (CmpInst::isSigned(LHSCC) || - (ICmpInst::isEquality(LHSCC) && + (ICmpInst::isEquality(LHSCC) && CmpInst::isSigned(RHSCC))) ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue()); else ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue()); - + if (ShouldSwap) { std::swap(LHS, RHS); std::swap(LHSCst, RHSCst); std::swap(LHSCC, RHSCC); } - + // At this point, we know we have two icmp instructions // comparing a value against two constants and or'ing the result // together. Because of the above check, we know that we only have @@ -1531,6 +1552,20 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); return Builder->CreateICmpULT(Add, AddCST); } + + if (LHS->getOperand(0) == RHS->getOperand(0)) { + // if LHSCst and RHSCst differ only by one bit: + // (A == C1 || A == C2) -> (A & ~(C1 ^ C2)) == C1 + assert(LHSCst->getValue().ule(LHSCst->getValue())); + + APInt Xor = LHSCst->getValue() ^ RHSCst->getValue(); + if (Xor.isPowerOf2()) { + Value *NegCst = Builder->getInt(~Xor); + Value *And = Builder->CreateAnd(LHS->getOperand(0), NegCst); + return Builder->CreateICmp(ICmpInst::ICMP_EQ, And, LHSCst); + } + } + break; // (X == 13 | X == 15) -> no change case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change @@ -1632,7 +1667,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { /// function. Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { if (LHS->getPredicate() == FCmpInst::FCMP_UNO && - RHS->getPredicate() == FCmpInst::FCMP_UNO && + RHS->getPredicate() == FCmpInst::FCMP_UNO && LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) { if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { @@ -1640,25 +1675,25 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { // true. if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) return ConstantInt::getTrue(LHS->getContext()); - + // Otherwise, no need to compare the two constants, compare the // rest. return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0)); } - + // Handle vector zeros. This occurs because the canonical form of // "fcmp uno x,x" is "fcmp uno x, 0". if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && isa<ConstantAggregateZero>(RHS->getOperand(1))) return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0)); - + return 0; } - + Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); - + if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { // Swap RHS operands to match LHS. Op1CC = FCmpInst::getSwappedPredicate(Op1CC); @@ -1692,7 +1727,7 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { /// ((A | B) & C1) | (B & C2) /// /// into: -/// +/// /// (A & C1) | B /// /// when the XOR of the two constants is "all ones" (-1). @@ -1727,7 +1762,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Value *V = SimplifyUsingDistributiveLaws(I)) return ReplaceInstUsesWith(I, V); - // See if we can simplify any instructions used by the instruction whose sole + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) return &I; @@ -1741,7 +1776,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { Op0->hasOneUse()) { Value *Or = Builder->CreateOr(X, RHS); Or->takeName(Op0); - return BinaryOperator::CreateAnd(Or, + return BinaryOperator::CreateAnd(Or, ConstantInt::get(I.getContext(), RHS->getValue() | C1->getValue())); } @@ -1778,7 +1813,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Instruction *BSwap = MatchBSwap(I)) return BSwap; } - + // (X^C)|Y -> (X|Y)^C iff Y&C == 0 if (Op0->hasOneUse() && match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) && @@ -1827,7 +1862,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return ReplaceInstUsesWith(I, B); } } - + if ((C1->getValue() & C2->getValue()) == 0) { // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2) // iff (C1&C2) == 0 and (N&~C1) == 0 @@ -1844,7 +1879,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return BinaryOperator::CreateAnd(B, ConstantInt::get(B->getContext(), C1->getValue()|C2->getValue())); - + // ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2) // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0. ConstantInt *C3 = 0, *C4 = 0; @@ -1904,16 +1939,16 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Ret) return Ret; } } - + // (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts. if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) { if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0)) - if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && + if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && SI0->getOperand(1) == SI1->getOperand(1) && (SI0->hasOneUse() || SI1->hasOneUse())) { Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0), SI0->getName()); - return BinaryOperator::Create(SI1->getOpcode(), NewOp, + return BinaryOperator::Create(SI1->getOpcode(), NewOp, SI1->getOperand(1)); } } @@ -1975,13 +2010,13 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) if (Value *Res = FoldOrOfICmps(LHS, RHS)) return ReplaceInstUsesWith(I, Res); - + // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) if (Value *Res = FoldOrOfFCmps(LHS, RHS)) return ReplaceInstUsesWith(I, Res); - + // fold (or (cast A), (cast B)) -> (cast (or A, B)) if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) { CastInst *Op1C = dyn_cast<CastInst>(Op1); @@ -1999,14 +2034,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); } - + // If this is or(cast(icmp), cast(icmp)), try to fold this even if the // cast is otherwise not optimizable. This happens for vector sexts. if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp)) if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp)) if (Value *Res = FoldOrOfICmps(LHS, RHS)) return CastInst::Create(Op0C->getOpcode(), Res, I.getType()); - + // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the // cast is otherwise not optimizable. This happens for vector sexts. if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp)) @@ -2035,7 +2070,21 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { Inner->takeName(Op0); return BinaryOperator::CreateOr(Inner, C1); } - + + // Change (or (bool?A:B),(bool?C:D)) --> (bool?(or A,C):(or B,D)) + // Since this OR statement hasn't been optimized further yet, we hope + // that this transformation will allow the new ORs to be optimized. + { + Value *X = 0, *Y = 0; + if (Op0->hasOneUse() && Op1->hasOneUse() && + match(Op0, m_Select(m_Value(X), m_Value(A), m_Value(B))) && + match(Op1, m_Select(m_Value(Y), m_Value(C), m_Value(D))) && X == Y) { + Value *orTrue = Builder->CreateOr(A, C); + Value *orFalse = Builder->CreateOr(B, D); + return SelectInst::Create(X, orTrue, orFalse); + } + } + return Changed ? &I : 0; } @@ -2050,7 +2099,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (Value *V = SimplifyUsingDistributiveLaws(I)) return ReplaceInstUsesWith(I, V); - // See if we can simplify any instructions used by the instruction whose sole + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) return &I; @@ -2058,7 +2107,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { // Is this a ~ operation? if (Value *NotOp = dyn_castNotVal(&I)) { if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) { - if (Op0I->getOpcode() == Instruction::And || + if (Op0I->getOpcode() == Instruction::And || Op0I->getOpcode() == Instruction::Or) { // ~(~X & Y) --> (X | ~Y) - De Morgan's Law // ~(~X | Y) === (X & ~Y) - De Morgan's Law @@ -2072,10 +2121,10 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { return BinaryOperator::CreateOr(Op0NotVal, NotY); return BinaryOperator::CreateAnd(Op0NotVal, NotY); } - + // ~(X & Y) --> (~X | ~Y) - De Morgan's Law // ~(X | Y) === (~X & ~Y) - De Morgan's Law - if (isFreeToInvert(Op0I->getOperand(0)) && + if (isFreeToInvert(Op0I->getOperand(0)) && isFreeToInvert(Op0I->getOperand(1))) { Value *NotX = Builder->CreateNot(Op0I->getOperand(0), "notlhs"); @@ -2093,8 +2142,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { } } } - - + + if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { if (RHS->isOne() && Op0->hasOneUse()) // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B @@ -2109,7 +2158,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (CI->hasOneUse() && Op0C->hasOneUse()) { Instruction::CastOps Opcode = Op0C->getOpcode(); if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && - (RHS == ConstantExpr::getCast(Opcode, + (RHS == ConstantExpr::getCast(Opcode, ConstantInt::getTrue(I.getContext()), Op0C->getDestTy()))) { CI->setPredicate(CI->getInversePredicate()); @@ -2128,7 +2177,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { ConstantInt::get(I.getType(), 1)); return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS); } - + if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) { if (Op0I->getOpcode() == Instruction::Add) { // ~(X-c) --> (-c-1)-X @@ -2152,13 +2201,34 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { // Anything in both C1 and C2 is known to be zero, remove it from // NewRHS. Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS); - NewRHS = ConstantExpr::getAnd(NewRHS, + NewRHS = ConstantExpr::getAnd(NewRHS, ConstantExpr::getNot(CommonBits)); Worklist.Add(Op0I); I.setOperand(0, Op0I->getOperand(0)); I.setOperand(1, NewRHS); return &I; } + } else if (Op0I->getOpcode() == Instruction::LShr) { + // ((X^C1) >> C2) ^ C3 -> (X>>C2) ^ ((C1>>C2)^C3) + // E1 = "X ^ C1" + BinaryOperator *E1; + ConstantInt *C1; + if (Op0I->hasOneUse() && + (E1 = dyn_cast<BinaryOperator>(Op0I->getOperand(0))) && + E1->getOpcode() == Instruction::Xor && + (C1 = dyn_cast<ConstantInt>(E1->getOperand(1)))) { + // fold (C1 >> C2) ^ C3 + ConstantInt *C2 = Op0CI, *C3 = RHS; + APInt FoldConst = C1->getValue().lshr(C2->getValue()); + FoldConst ^= C3->getValue(); + // Prepare the two operands. + Value *Opnd0 = Builder->CreateLShr(E1->getOperand(0), C2); + Opnd0->takeName(Op0I); + cast<Instruction>(Opnd0)->setDebugLoc(I.getDebugLoc()); + Value *FoldVal = ConstantInt::get(Opnd0->getType(), FoldConst); + + return BinaryOperator::CreateXor(Opnd0, FoldVal); + } } } } @@ -2184,7 +2254,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { I.swapOperands(); // Simplified below. std::swap(Op0, Op1); } - } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && + } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && Op1I->hasOneUse()){ if (A == Op0) { // A^(A&B) -> A^(B&A) Op1I->swapOperands(); @@ -2196,7 +2266,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { } } } - + BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0); if (Op0I) { Value *A, *B; @@ -2206,7 +2276,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { std::swap(A, B); if (B == Op1) // (A|B)^B == A & ~B return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1)); - } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && + } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && Op0I->hasOneUse()){ if (A == Op1) // (A&B)^A -> (B&A)^A std::swap(A, B); @@ -2216,31 +2286,31 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { } } } - + // (X >> Z) ^ (Y >> Z) -> (X^Y) >> Z for all shifts. - if (Op0I && Op1I && Op0I->isShift() && - Op0I->getOpcode() == Op1I->getOpcode() && + if (Op0I && Op1I && Op0I->isShift() && + Op0I->getOpcode() == Op1I->getOpcode() && Op0I->getOperand(1) == Op1I->getOperand(1) && (Op0I->hasOneUse() || Op1I->hasOneUse())) { Value *NewOp = Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0), Op0I->getName()); - return BinaryOperator::Create(Op1I->getOpcode(), NewOp, + return BinaryOperator::Create(Op1I->getOpcode(), NewOp, Op1I->getOperand(1)); } - + if (Op0I && Op1I) { Value *A, *B, *C, *D; // (A & B)^(A | B) -> A ^ B if (match(Op0I, m_And(m_Value(A), m_Value(B))) && match(Op1I, m_Or(m_Value(C), m_Value(D)))) { - if ((A == C && B == D) || (A == D && B == C)) + if ((A == C && B == D) || (A == D && B == C)) return BinaryOperator::CreateXor(A, B); } // (A | B)^(A & B) -> A ^ B if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && match(Op1I, m_And(m_Value(C), m_Value(D)))) { - if ((A == C && B == D) || (A == D && B == C)) + if ((A == C && B == D) || (A == D && B == C)) return BinaryOperator::CreateXor(A, B); } } @@ -2257,7 +2327,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS); bool isSigned = LHS->isSigned() || RHS->isSigned(); - return ReplaceInstUsesWith(I, + return ReplaceInstUsesWith(I, getNewICmpValue(isSigned, Code, Op0, Op1, Builder)); } @@ -2270,9 +2340,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { Type *SrcTy = Op0C->getOperand(0)->getType(); if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntegerTy() && // Only do this if the casts both really cause code to be generated. - ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0), + ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0), I.getType()) && - ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0), + ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0), I.getType())) { Value *NewOp = Builder->CreateXor(Op0C->getOperand(0), Op1C->getOperand(0), I.getName()); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 48f2704..64cd1bd 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -12,12 +12,17 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/Support/CallSite.h" -#include "llvm/DataLayout.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/PatternMatch.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +using namespace PatternMatch; + +STATISTIC(NumSimplified, "Number of library calls simplified"); /// getPromotedType - Return the specified type promoted as it would be to pass /// though a va_arg area. @@ -273,25 +278,25 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size)); return 0; } - case Intrinsic::bswap: + case Intrinsic::bswap: { + Value *IIOperand = II->getArgOperand(0); + Value *X = 0; + // bswap(bswap(x)) -> x - if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) - if (Operand->getIntrinsicID() == Intrinsic::bswap) - return ReplaceInstUsesWith(CI, Operand->getArgOperand(0)); + if (match(IIOperand, m_BSwap(m_Value(X)))) + return ReplaceInstUsesWith(CI, X); // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) - if (TruncInst *TI = dyn_cast<TruncInst>(II->getArgOperand(0))) { - if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0))) - if (Operand->getIntrinsicID() == Intrinsic::bswap) { - unsigned C = Operand->getType()->getPrimitiveSizeInBits() - - TI->getType()->getPrimitiveSizeInBits(); - Value *CV = ConstantInt::get(Operand->getType(), C); - Value *V = Builder->CreateLShr(Operand->getArgOperand(0), CV); - return new TruncInst(V, TI->getType()); - } + if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) { + unsigned C = X->getType()->getPrimitiveSizeInBits() - + IIOperand->getType()->getPrimitiveSizeInBits(); + Value *CV = ConstantInt::get(X->getType(), C); + Value *V = Builder->CreateLShr(X, CV); + return new TruncInst(V, IIOperand->getType()); } - break; + } + case Intrinsic::powi: if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) { // powi(x, 0) -> 1.0 @@ -690,7 +695,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (Splat->isOne()) { if (Zext) return CastInst::CreateZExtOrBitCast(Arg0, II->getType()); - // else + // else return CastInst::CreateSExtOrBitCast(Arg0, II->getType()); } } @@ -785,8 +790,10 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *TD) { if (CI->getCalledFunction() == 0) return 0; - if (Value *With = Simplifier->optimizeCall(CI)) - return ReplaceInstUsesWith(*CI, With); + if (Value *With = Simplifier->optimizeCall(CI)) { + ++NumSimplified; + return CI->use_empty() ? CI : ReplaceInstUsesWith(*CI, With); + } return 0; } @@ -894,7 +901,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { new StoreInst(ConstantInt::getTrue(Callee->getContext()), UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), OldCall); - // If OldCall dues not return void then replaceAllUsesWith undef. + // If OldCall does not return void then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. if (!OldCall->getType()->isVoidTy()) ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType())); @@ -977,7 +984,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (Callee == 0) return false; Instruction *Caller = CS.getInstruction(); - const AttrListPtr &CallerPAL = CS.getAttributes(); + const AttributeSet &CallerPAL = CS.getAttributes(); // Okay, this is a cast from a function to a different type. Unless doing so // would cause a type conversion of one of our arguments, change this call to @@ -1007,8 +1014,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { return false; // Cannot transform this return value. if (!CallerPAL.isEmpty() && !Caller->use_empty()) { - AttrBuilder RAttrs = CallerPAL.getRetAttributes(); - if (RAttrs.hasAttributes(Attributes::typeIncompatible(NewRetTy))) + AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex); + if (RAttrs. + hasAttributes(AttributeFuncs:: + typeIncompatible(NewRetTy, AttributeSet::ReturnIndex), + AttributeSet::ReturnIndex)) return false; // Attribute not compatible with transformed value. } @@ -1037,14 +1047,16 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (!CastInst::isCastable(ActTy, ParamTy)) return false; // Cannot transform this parameter value. - Attributes Attrs = CallerPAL.getParamAttributes(i + 1); - if (AttrBuilder(Attrs). - hasAttributes(Attributes::typeIncompatible(ParamTy))) + if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1). + hasAttributes(AttributeFuncs:: + typeIncompatible(ParamTy, i + 1), i + 1)) return false; // Attribute not compatible with transformed value. // If the parameter is passed as a byval argument, then we have to have a // sized type and the sized type has to have the same size as the old type. - if (ParamTy != ActTy && Attrs.hasAttribute(Attributes::ByVal)) { + if (ParamTy != ActTy && + CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1, + Attribute::ByVal)) { PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy); if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0) return false; @@ -1093,10 +1105,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // won't be dropping them. Check that these extra arguments have attributes // that are compatible with being a vararg call argument. for (unsigned i = CallerPAL.getNumSlots(); i; --i) { - if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams()) + unsigned Index = CallerPAL.getSlotIndex(i - 1); + if (Index <= FT->getNumParams()) break; - Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs; - if (PAttrs.hasIncompatibleWithVarArgsAttrs()) + + // Check if it has an attribute that's incompatible with varargs. + AttributeSet PAttrs = CallerPAL.getSlotAttributes(i - 1); + if (PAttrs.hasAttribute(Index, Attribute::StructRet)) return false; } @@ -1105,21 +1120,23 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // inserting cast instructions as necessary. std::vector<Value*> Args; Args.reserve(NumActualArgs); - SmallVector<AttributeWithIndex, 8> attrVec; + SmallVector<AttributeSet, 8> attrVec; attrVec.reserve(NumCommonArgs); // Get any return attributes. - AttrBuilder RAttrs = CallerPAL.getRetAttributes(); + AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex); // If the return value is not being used, the type may not be compatible // with the existing attributes. Wipe out any problematic attributes. - RAttrs.removeAttributes(Attributes::typeIncompatible(NewRetTy)); + RAttrs. + removeAttributes(AttributeFuncs:: + typeIncompatible(NewRetTy, AttributeSet::ReturnIndex), + AttributeSet::ReturnIndex); // Add the new return attributes. if (RAttrs.hasAttributes()) - attrVec.push_back( - AttributeWithIndex::get(AttrListPtr::ReturnIndex, - Attributes::get(FT->getContext(), RAttrs))); + attrVec.push_back(AttributeSet::get(Caller->getContext(), + AttributeSet::ReturnIndex, RAttrs)); AI = CS.arg_begin(); for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { @@ -1133,9 +1150,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { } // Add any parameter attributes. - Attributes PAttrs = CallerPAL.getParamAttributes(i + 1); + AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1); if (PAttrs.hasAttributes()) - attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); + attrVec.push_back(AttributeSet::get(Caller->getContext(), i + 1, + PAttrs)); } // If the function takes more arguments than the call was taking, add them @@ -1145,10 +1163,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // If we are removing arguments to the function, emit an obnoxious warning. if (FT->getNumParams() < NumActualArgs) { - if (!FT->isVarArg()) { - errs() << "WARNING: While resolving call to function '" - << Callee->getName() << "' arguments were dropped!\n"; - } else { + // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722 + if (FT->isVarArg()) { // Add all of the arguments in their promoted form to the arg list. for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) { Type *PTy = getPromotedType((*AI)->getType()); @@ -1162,23 +1178,23 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { } // Add any parameter attributes. - Attributes PAttrs = CallerPAL.getParamAttributes(i + 1); + AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1); if (PAttrs.hasAttributes()) - attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs)); + attrVec.push_back(AttributeSet::get(FT->getContext(), i + 1, + PAttrs)); } } } - Attributes FnAttrs = CallerPAL.getFnAttributes(); - if (FnAttrs.hasAttributes()) - attrVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, - FnAttrs)); + AttributeSet FnAttrs = CallerPAL.getFnAttributes(); + if (CallerPAL.hasAttributes(AttributeSet::FunctionIndex)) + attrVec.push_back(AttributeSet::get(Callee->getContext(), FnAttrs)); if (NewRetTy->isVoidTy()) Caller->setName(""); // Void type should not have a name. - const AttrListPtr &NewCallerPAL = AttrListPtr::get(Callee->getContext(), - attrVec); + const AttributeSet &NewCallerPAL = AttributeSet::get(Callee->getContext(), + attrVec); Instruction *NC; if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { @@ -1238,13 +1254,12 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, Value *Callee = CS.getCalledValue(); PointerType *PTy = cast<PointerType>(Callee->getType()); FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); - const AttrListPtr &Attrs = CS.getAttributes(); + const AttributeSet &Attrs = CS.getAttributes(); // If the call already has the 'nest' attribute somewhere then give up - // otherwise 'nest' would occur twice after splicing in the chain. - for (unsigned I = 0, E = Attrs.getNumAttrs(); I != E; ++I) - if (Attrs.getAttributesAtIndex(I).hasAttribute(Attributes::Nest)) - return 0; + if (Attrs.hasAttrSomewhere(Attribute::Nest)) + return 0; assert(Tramp && "transformCallThroughTrampoline called with incorrect CallSite."); @@ -1253,16 +1268,16 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, PointerType *NestFPTy = cast<PointerType>(NestF->getType()); FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType()); - const AttrListPtr &NestAttrs = NestF->getAttributes(); + const AttributeSet &NestAttrs = NestF->getAttributes(); if (!NestAttrs.isEmpty()) { unsigned NestIdx = 1; Type *NestTy = 0; - Attributes NestAttr; + AttributeSet NestAttr; // Look for a parameter marked with the 'nest' attribute. for (FunctionType::param_iterator I = NestFTy->param_begin(), E = NestFTy->param_end(); I != E; ++NestIdx, ++I) - if (NestAttrs.getParamAttributes(NestIdx).hasAttribute(Attributes::Nest)){ + if (NestAttrs.hasAttribute(NestIdx, Attribute::Nest)) { // Record the parameter type and any other attributes. NestTy = *I; NestAttr = NestAttrs.getParamAttributes(NestIdx); @@ -1274,17 +1289,16 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, std::vector<Value*> NewArgs; NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1); - SmallVector<AttributeWithIndex, 8> NewAttrs; + SmallVector<AttributeSet, 8> NewAttrs; NewAttrs.reserve(Attrs.getNumSlots() + 1); // Insert the nest argument into the call argument list, which may // mean appending it. Likewise for attributes. // Add any result attributes. - Attributes Attr = Attrs.getRetAttributes(); - if (Attr.hasAttributes()) - NewAttrs.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex, - Attr)); + if (Attrs.hasAttributes(AttributeSet::ReturnIndex)) + NewAttrs.push_back(AttributeSet::get(Caller->getContext(), + Attrs.getRetAttributes())); { unsigned Idx = 1; @@ -1296,7 +1310,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, if (NestVal->getType() != NestTy) NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest"); NewArgs.push_back(NestVal); - NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr)); + NewAttrs.push_back(AttributeSet::get(Caller->getContext(), + NestAttr)); } if (I == E) @@ -1304,20 +1319,21 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // Add the original argument and attributes. NewArgs.push_back(*I); - Attr = Attrs.getParamAttributes(Idx); - if (Attr.hasAttributes()) - NewAttrs.push_back - (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr)); + AttributeSet Attr = Attrs.getParamAttributes(Idx); + if (Attr.hasAttributes(Idx)) { + AttrBuilder B(Attr, Idx); + NewAttrs.push_back(AttributeSet::get(Caller->getContext(), + Idx + (Idx >= NestIdx), B)); + } ++Idx, ++I; } while (1); } // Add any function attributes. - Attr = Attrs.getFnAttributes(); - if (Attr.hasAttributes()) - NewAttrs.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex, - Attr)); + if (Attrs.hasAttributes(AttributeSet::FunctionIndex)) + NewAttrs.push_back(AttributeSet::get(FTy->getContext(), + Attrs.getFnAttributes())); // The trampoline may have been bitcast to a bogus type (FTy). // Handle this by synthesizing a new function type, equal to FTy @@ -1356,7 +1372,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, NestF->getType() == PointerType::getUnqual(NewFTy) ? NestF : ConstantExpr::getBitCast(NestF, PointerType::getUnqual(NewFTy)); - const AttrListPtr &NewPAL = AttrListPtr::get(FTy->getContext(), NewAttrs); + const AttributeSet &NewPAL = AttributeSet::get(FTy->getContext(), NewAttrs); Instruction *NewCaller; if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index bb59db8..2ee1278 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -13,9 +13,9 @@ #include "InstCombine.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; using namespace PatternMatch; @@ -30,7 +30,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, Scale = 0; return ConstantInt::get(Val->getType(), 0); } - + if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) { // Cannot look past anything that might overflow. OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val); @@ -47,19 +47,19 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, Offset = 0; return I->getOperand(0); } - + if (I->getOpcode() == Instruction::Mul) { // This value is scaled by 'RHS'. Scale = RHS->getZExtValue(); Offset = 0; return I->getOperand(0); } - + if (I->getOpcode() == Instruction::Add) { - // We have X+C. Check to see if we really have (X*C2)+C1, + // We have X+C. Check to see if we really have (X*C2)+C1, // where C1 is divisible by C2. unsigned SubScale; - Value *SubVal = + Value *SubVal = DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset); Offset += RHS->getZExtValue(); Scale = SubScale; @@ -82,7 +82,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, if (!TD) return 0; PointerType *PTy = cast<PointerType>(CI.getType()); - + BuilderTy AllocaBuilder(*Builder); AllocaBuilder.SetInsertPoint(AI.getParent(), &AI); @@ -104,13 +104,19 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy); if (CastElTySize == 0 || AllocElTySize == 0) return 0; + // If the allocation has multiple uses, only promote it if we're not + // shrinking the amount of memory being allocated. + uint64_t AllocElTyStoreSize = TD->getTypeStoreSize(AllocElTy); + uint64_t CastElTyStoreSize = TD->getTypeStoreSize(CastElTy); + if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return 0; + // See if we can satisfy the modulus by pulling a scale out of the array // size argument. unsigned ArraySizeScale; uint64_t ArrayOffset; Value *NumElements = // See if the array size is a decomposable linear expr. DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset); - + // If we can now satisfy the modulus, by using a non-1 scale, we really can // do the xform. if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 || @@ -125,17 +131,17 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // Insert before the alloca, not before the cast. Amt = AllocaBuilder.CreateMul(Amt, NumElements); } - + if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { Value *Off = ConstantInt::get(AI.getArraySize()->getType(), Offset, true); Amt = AllocaBuilder.CreateAdd(Amt, Off); } - + AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt); New->setAlignment(AI.getAlignment()); New->takeName(&AI); - + // If the allocation has multiple real uses, insert a cast and change all // things that used it to use the new cast. This will also hack on CI, but it // will die soon. @@ -148,10 +154,10 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, return ReplaceInstUsesWith(CI, New); } -/// EvaluateInDifferentType - Given an expression that +/// EvaluateInDifferentType - Given an expression that /// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually /// insert the code to evaluate the expression. -Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, +Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned) { if (Constant *C = dyn_cast<Constant>(V)) { C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); @@ -181,7 +187,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); break; - } + } case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: @@ -190,7 +196,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, // new. if (I->getOperand(0)->getType() == Ty) return I->getOperand(0); - + // Otherwise, must be the same type of cast, so just reinsert a new one. // This also handles the case of zext(trunc(x)) -> zext(x). Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty, @@ -212,11 +218,11 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, Res = NPN; break; } - default: + default: // TODO: Can handle more cases here. llvm_unreachable("Unreachable!"); } - + Res->takeName(I); return InsertNewInstWith(Res, *I); } @@ -224,7 +230,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, /// This function is a wrapper around CastInst::isEliminableCastPair. It /// simply extracts arguments and returns what that function returns. -static Instruction::CastOps +static Instruction::CastOps isEliminableCastPair( const CastInst *CI, ///< The first cast instruction unsigned opcode, ///< The opcode of the second cast instruction @@ -253,7 +259,7 @@ isEliminableCastPair( if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) || (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy)) Res = 0; - + return Instruction::CastOps(Res); } @@ -265,18 +271,18 @@ bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V, Type *Ty) { // Noop casts and casts of constants should be eliminated trivially. if (V->getType() == Ty || isa<Constant>(V)) return false; - + // If this is another cast that can be eliminated, we prefer to have it // eliminated. if (const CastInst *CI = dyn_cast<CastInst>(V)) if (isEliminableCastPair(CI, opc, Ty, TD)) return false; - + // If this is a vector sext from a compare, then we don't want to break the // idiom where each element of the extended vector is either zero or all ones. if (opc == Instruction::SExt && isa<CmpInst>(V) && Ty->isVectorTy()) return false; - + return true; } @@ -288,7 +294,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { // Many cases of "cast of a cast" are eliminable. If it's eliminable we just // eliminate it now. if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast - if (Instruction::CastOps opc = + if (Instruction::CastOps opc = isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) { // The first cast (CSrc) is eliminable so we need to fix up or replace // the second cast (CI). CSrc will then have a good chance of being dead. @@ -311,7 +317,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { if (Instruction *NV = FoldOpIntoPhi(CI)) return NV; } - + return 0; } @@ -330,15 +336,15 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) { // We can always evaluate constants in another type. if (isa<Constant>(V)) return true; - + Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - + Type *OrigTy = V->getType(); - + // If this is an extension from the dest type, we can eliminate it, even if it // has multiple uses. - if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) && + if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) && I->getOperand(0)->getType() == Ty) return true; @@ -423,29 +429,29 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty) { // TODO: Can handle more cases here. break; } - + return false; } Instruction *InstCombiner::visitTrunc(TruncInst &CI) { if (Instruction *Result = commonCastTransforms(CI)) return Result; - - // See if we can simplify any instructions used by the input whose sole + + // See if we can simplify any instructions used by the input whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(CI)) return &CI; - + Value *Src = CI.getOperand(0); Type *DestTy = CI.getType(), *SrcTy = Src->getType(); - + // Attempt to truncate the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the // expression tree to something weird like i93 unless the source is also // strange. if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) && CanEvaluateTruncated(Src, DestTy)) { - + // If this cast is a truncate, evaluting in a different type always // eliminates the cast, so it is always a win. DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" @@ -462,7 +468,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { Value *Zero = Constant::getNullValue(Src->getType()); return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); } - + // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion. Value *A = 0; ConstantInt *Cst = 0; if (Src->hasOneUse() && @@ -472,7 +478,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // ASize < MidSize and MidSize > ResultSize, but don't know the relation // between ASize and ResultSize. unsigned ASize = A->getType()->getPrimitiveSizeInBits(); - + // If the shift amount is larger than the size of A, then the result is // known to be zero because all the input bits got shifted out. if (Cst->getZExtValue() >= ASize) @@ -485,7 +491,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { Shift->takeName(Src); return CastInst::CreateIntegerCast(Shift, CI.getType(), false); } - + // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest // type isn't non-native. if (Src->hasOneUse() && isa<IntegerType>(Src->getType()) && @@ -508,7 +514,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, // cast to integer to avoid the comparison. if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) { const APInt &Op1CV = Op1C->getValue(); - + // zext (x <s 0) to i32 --> x>>u31 true if signbit set. // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear. if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) || @@ -538,14 +544,14 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set. // zext (X != 1) to i32 --> X^1 iff X has only the low bit set. // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. - if ((Op1CV == 0 || Op1CV.isPowerOf2()) && + if ((Op1CV == 0 || Op1CV.isPowerOf2()) && // This only works for EQ and NE ICI->isEquality()) { // If Op1C some other power of two, convert: uint32_t BitWidth = Op1C->getType()->getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); ComputeMaskedBits(ICI->getOperand(0), KnownZero, KnownOne); - + APInt KnownZeroMask(~KnownZero); if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? if (!DoXform) return ICI; @@ -559,7 +565,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, Res = ConstantExpr::getZExt(Res, CI.getType()); return ReplaceInstUsesWith(CI, Res); } - + uint32_t ShiftAmt = KnownZeroMask.logBase2(); Value *In = ICI->getOperand(0); if (ShiftAmt) { @@ -568,12 +574,12 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt), In->getName()+".lobit"); } - + if ((Op1CV != 0) == isNE) { // Toggle the low bit. Constant *One = ConstantInt::get(In->getType(), 1); In = Builder->CreateXor(In, One); } - + if (CI.getType() == In->getType()) return ReplaceInstUsesWith(CI, In); return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/); @@ -646,19 +652,19 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { BitsToClear = 0; if (isa<Constant>(V)) return true; - + Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - + // If the input is a truncate from the destination type, we can trivially // eliminate it. if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty) return true; - + // We can't extend or shrink something that has multiple uses: doing so would // require duplicating the instruction in general, which isn't profitable. if (!I->hasOneUse()) return false; - + unsigned Opc = I->getOpcode(), Tmp; switch (Opc) { case Instruction::ZExt: // zext(zext(x)) -> zext(x). @@ -678,7 +684,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { // These can all be promoted if neither operand has 'bits to clear'. if (BitsToClear == 0 && Tmp == 0) return true; - + // If the operation is an AND/OR/XOR and the bits to clear are zero in the // other side, BitsToClear is ok. if (Tmp == 0 && @@ -691,10 +697,10 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { APInt::getHighBitsSet(VSize, BitsToClear))) return true; } - + // Otherwise, we don't know how to analyze this BitsToClear case yet. return false; - + case Instruction::LShr: // We can promote lshr(x, cst) if we can promote x. This requires the // ultimate 'and' to clear out the high zero bits we're clearing out though. @@ -716,7 +722,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { Tmp != BitsToClear) return false; return true; - + case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never // get into trouble with cyclic PHIs here because we only consider @@ -739,48 +745,48 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { } Instruction *InstCombiner::visitZExt(ZExtInst &CI) { - // If this zero extend is only used by a truncate, let the truncate by + // If this zero extend is only used by a truncate, let the truncate be // eliminated before we try to optimize this zext. if (CI.hasOneUse() && isa<TruncInst>(CI.use_back())) return 0; - + // If one of the common conversion will work, do it. if (Instruction *Result = commonCastTransforms(CI)) return Result; - // See if we can simplify any instructions used by the input whose sole + // See if we can simplify any instructions used by the input whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(CI)) return &CI; - + Value *Src = CI.getOperand(0); Type *SrcTy = Src->getType(), *DestTy = CI.getType(); - + // Attempt to extend the entire input expression tree to the destination // type. Only do this if the dest type is a simple type, don't convert the // expression tree to something weird like i93 unless the source is also // strange. unsigned BitsToClear; if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) && - CanEvaluateZExtd(Src, DestTy, BitsToClear)) { + CanEvaluateZExtd(Src, DestTy, BitsToClear)) { assert(BitsToClear < SrcTy->getScalarSizeInBits() && "Unreasonable BitsToClear"); - + // Okay, we can transform this! Insert the new expression now. DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" " to avoid zero extend: " << CI); Value *Res = EvaluateInDifferentType(Src, DestTy, false); assert(Res->getType() == DestTy); - + uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear; uint32_t DestBitSize = DestTy->getScalarSizeInBits(); - + // If the high bits are already filled with zeros, just replace this // cast with the result. if (MaskedValueIsZero(Res, APInt::getHighBitsSet(DestBitSize, DestBitSize-SrcBitsKept))) return ReplaceInstUsesWith(CI, Res); - + // We need to emit an AND to clear the high bits. Constant *C = ConstantInt::get(Res->getType(), APInt::getLowBitsSet(DestBitSize, SrcBitsKept)); @@ -792,7 +798,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { // 'and' which will be much cheaper than the pair of casts. if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast // TODO: Subsume this into EvaluateInDifferentType. - + // Get the sizes of the types involved. We know that the intermediate type // will be smaller than A or C, but don't know the relation between A and C. Value *A = CSrc->getOperand(0); @@ -809,7 +815,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask"); return new ZExtInst(And, CI.getType()); } - + if (SrcSize == DstSize) { APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(), @@ -818,7 +824,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { if (SrcSize > DstSize) { Value *Trunc = Builder->CreateTrunc(A, CI.getType()); APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize)); - return BinaryOperator::CreateAnd(Trunc, + return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(Trunc->getType(), AndValue)); } @@ -876,7 +882,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { Value *New = Builder->CreateZExt(X, CI.getType()); return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1)); } - + return 0; } @@ -989,14 +995,14 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) { // If this is a constant, it can be trivially promoted. if (isa<Constant>(V)) return true; - + Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - + // If this is a truncate from the dest type, we can trivially eliminate it. if (isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty) return true; - + // We can't extend or shrink something that has multiple uses: doing so would // require duplicating the instruction in general, which isn't profitable. if (!I->hasOneUse()) return false; @@ -1015,14 +1021,14 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) { // These operators can all arbitrarily be extended if their inputs can. return CanEvaluateSExtd(I->getOperand(0), Ty) && CanEvaluateSExtd(I->getOperand(1), Ty); - + //case Instruction::Shl: TODO //case Instruction::LShr: TODO - + case Instruction::Select: return CanEvaluateSExtd(I->getOperand(1), Ty) && CanEvaluateSExtd(I->getOperand(2), Ty); - + case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never // get into trouble with cyclic PHIs here because we only consider @@ -1036,24 +1042,24 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) { // TODO: Can handle more cases here. break; } - + return false; } Instruction *InstCombiner::visitSExt(SExtInst &CI) { - // If this sign extend is only used by a truncate, let the truncate by - // eliminated before we try to optimize this zext. + // If this sign extend is only used by a truncate, let the truncate be + // eliminated before we try to optimize this sext. if (CI.hasOneUse() && isa<TruncInst>(CI.use_back())) return 0; - + if (Instruction *I = commonCastTransforms(CI)) return I; - - // See if we can simplify any instructions used by the input whose sole + + // See if we can simplify any instructions used by the input whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(CI)) return &CI; - + Value *Src = CI.getOperand(0); Type *SrcTy = Src->getType(), *DestTy = CI.getType(); @@ -1076,7 +1082,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // cast with the result. if (ComputeNumSignBits(Res) > DestBitSize - SrcBitSize) return ReplaceInstUsesWith(CI, Res); - + // We need to emit a shl + ashr to do the sign extend. Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize); return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"), @@ -1089,7 +1095,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { if (TI->hasOneUse() && TI->getOperand(0)->getType() == DestTy) { uint32_t SrcBitSize = SrcTy->getScalarSizeInBits(); uint32_t DestBitSize = DestTy->getScalarSizeInBits(); - + // We need to emit a shl + ashr to do the sign extend. Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize); Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext"); @@ -1125,7 +1131,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { A = Builder->CreateShl(A, ShAmtV, CI.getName()); return BinaryOperator::CreateAShr(A, ShAmtV); } - + return 0; } @@ -1147,7 +1153,7 @@ static Value *LookThroughFPExtensions(Value *V) { if (Instruction *I = dyn_cast<Instruction>(V)) if (I->getOpcode() == Instruction::FPExt) return LookThroughFPExtensions(I->getOperand(0)); - + // If this value is a constant, return the constant in the smallest FP type // that can accurately represent it. This allows us to turn // (float)((double)X+2.0) into x+2.0f. @@ -1166,14 +1172,14 @@ static Value *LookThroughFPExtensions(Value *V) { return V; // Don't try to shrink to various long double types. } - + return V; } Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { if (Instruction *I = commonCastTransforms(CI)) return I; - + // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are // smaller than the destination type, we can eliminate the truncate by doing // the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well @@ -1190,7 +1196,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { Type *SrcTy = OpI->getType(); Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0)); Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1)); - if (LHSTrunc->getType() != SrcTy && + if (LHSTrunc->getType() != SrcTy && RHSTrunc->getType() != SrcTy) { unsigned DstSize = CI.getType()->getScalarSizeInBits(); // If the source types were both smaller than the destination type of @@ -1202,10 +1208,36 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc); } } - break; + break; + } + + // (fptrunc (fneg x)) -> (fneg (fptrunc x)) + if (BinaryOperator::isFNeg(OpI)) { + Value *InnerTrunc = Builder->CreateFPTrunc(OpI->getOperand(1), + CI.getType()); + return BinaryOperator::CreateFNeg(InnerTrunc); } } - + + IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI.getOperand(0)); + if (II) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::fabs: { + // (fptrunc (fabs x)) -> (fabs (fptrunc x)) + Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0), + CI.getType()); + Type *IntrinsicType[] = { CI.getType() }; + Function *Overload = + Intrinsic::getDeclaration(CI.getParent()->getParent()->getParent(), + II->getIntrinsicID(), IntrinsicType); + + Value *Args[] = { InnerTrunc }; + return CallInst::Create(Overload, Args, II->getName()); + } + } + } + // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x) CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0)); if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) && @@ -1220,7 +1252,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { Arg->getOperand(0)->getType()->isFloatTy()) { Function *Callee = Call->getCalledFunction(); Module *M = CI.getParent()->getParent()->getParent(); - Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf", + Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf", Callee->getAttributes(), Builder->getFloatTy(), Builder->getFloatTy(), @@ -1228,15 +1260,15 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0), "sqrtfcall"); ret->setAttributes(Callee->getAttributes()); - - + + // Remove the old Call. With -fmath-errno, it won't get marked readnone. ReplaceInstUsesWith(*Call, UndefValue::get(Call->getType())); EraseInstFromFunction(*Call); return ret; } } - + return 0; } @@ -1254,7 +1286,7 @@ Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) { // This is safe if the intermediate type has enough bits in its mantissa to // accurately represent all values of X. For example, do not do this with // i64->float->i64. This is also safe for sitofp case, because any negative - // 'X' value would cause an undefined result for the fptoui. + // 'X' value would cause an undefined result for the fptoui. if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) && OpI->getOperand(0)->getType() == FI.getType() && (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */ @@ -1268,19 +1300,19 @@ Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) { Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0)); if (OpI == 0) return commonCastTransforms(FI); - + // fptosi(sitofp(X)) --> X // fptosi(uitofp(X)) --> X // This is safe if the intermediate type has enough bits in its mantissa to // accurately represent all values of X. For example, do not do this with // i64->float->i64. This is also safe for sitofp case, because any negative - // 'X' value would cause an undefined result for the fptoui. + // 'X' value would cause an undefined result for the fptoui. if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) && OpI->getOperand(0)->getType() == FI.getType() && (int)FI.getType()->getScalarSizeInBits() <= OpI->getType()->getFPMantissaWidth()) return ReplaceInstUsesWith(FI, OpI->getOperand(0)); - + return commonCastTransforms(FI); } @@ -1296,21 +1328,16 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { // If the source integer type is not the intptr_t type for this target, do a // trunc or zext to the intptr_t type, then inttoptr of it. This allows the // cast to be exposed to other transforms. - if (TD) { - if (CI.getOperand(0)->getType()->getScalarSizeInBits() > - TD->getPointerSizeInBits()) { - Value *P = Builder->CreateTrunc(CI.getOperand(0), - TD->getIntPtrType(CI.getContext())); - return new IntToPtrInst(P, CI.getType()); - } - if (CI.getOperand(0)->getType()->getScalarSizeInBits() < - TD->getPointerSizeInBits()) { - Value *P = Builder->CreateZExt(CI.getOperand(0), - TD->getIntPtrType(CI.getContext())); - return new IntToPtrInst(P, CI.getType()); - } + if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() != + TD->getPointerSizeInBits()) { + Type *Ty = TD->getIntPtrType(CI.getContext()); + if (CI.getType()->isVectorTy()) // Handle vectors of pointers. + Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements()); + + Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty); + return new IntToPtrInst(P, CI.getType()); } - + if (Instruction *I = commonCastTransforms(CI)) return I; @@ -1320,34 +1347,32 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { /// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint) Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { Value *Src = CI.getOperand(0); - + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) { // If casting the result of a getelementptr instruction with no offset, turn // this into a cast of the original pointer! if (GEP->hasAllZeroIndices()) { // Changing the cast operand is usually not a good idea but it is safe - // here because the pointer operand is being replaced with another + // here because the pointer operand is being replaced with another // pointer operand so the opcode doesn't need to change. Worklist.Add(GEP); CI.setOperand(0, GEP->getOperand(0)); return &CI; } - + // If the GEP has a single use, and the base pointer is a bitcast, and the // GEP computes a constant offset, see if we can convert these three // instructions into fewer. This typically happens with unions and other // non-type-safe code. + APInt Offset(TD ? TD->getPointerSizeInBits() : 1, 0); if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0)) && - GEP->hasAllConstantIndices()) { - SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end()); - int64_t Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops); - + GEP->accumulateConstantOffset(*TD, Offset)) { // Get the base pointer input of the bitcast, and the type it points to. Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0); Type *GEPIdxTy = cast<PointerType>(OrigBase->getType())->getElementType(); SmallVector<Value*, 8> NewIndices; - if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices)) { + if (FindElementAtOffset(GEPIdxTy, Offset.getSExtValue(), NewIndices)) { // If we were able to index down into an element, create the GEP // and bitcast the result. This eliminates one bitcast, potentially // two. @@ -1355,15 +1380,15 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { Builder->CreateInBoundsGEP(OrigBase, NewIndices) : Builder->CreateGEP(OrigBase, NewIndices); NGEP->takeName(GEP); - + if (isa<BitCastInst>(CI)) return new BitCastInst(NGEP, CI.getType()); assert(isa<PtrToIntInst>(CI)); return new PtrToIntInst(NGEP, CI.getType()); - } + } } } - + return commonCastTransforms(CI); } @@ -1371,19 +1396,15 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { // If the destination integer type is not the intptr_t type for this target, // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast // to be exposed to other transforms. - if (TD) { - if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { - Value *P = Builder->CreatePtrToInt(CI.getOperand(0), - TD->getIntPtrType(CI.getContext())); - return new TruncInst(P, CI.getType()); - } - if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) { - Value *P = Builder->CreatePtrToInt(CI.getOperand(0), - TD->getIntPtrType(CI.getContext())); - return new ZExtInst(P, CI.getType()); - } + if (TD && CI.getType()->getScalarSizeInBits() != TD->getPointerSizeInBits()) { + Type *Ty = TD->getIntPtrType(CI.getContext()); + if (CI.getType()->isVectorTy()) // Handle vectors of pointers. + Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements()); + + Value *P = Builder->CreatePtrToInt(CI.getOperand(0), Ty); + return CastInst::CreateIntegerCast(P, CI.getType(), /*isSigned=*/false); } - + return commonPointerCastTransforms(CI); } @@ -1398,33 +1419,33 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy, // element size, or the input is a multiple of the output element size. // Convert the input type to have the same element type as the output. VectorType *SrcTy = cast<VectorType>(InVal->getType()); - + if (SrcTy->getElementType() != DestTy->getElementType()) { // The input types don't need to be identical, but for now they must be the // same size. There is no specific reason we couldn't handle things like // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten - // there yet. + // there yet. if (SrcTy->getElementType()->getPrimitiveSizeInBits() != DestTy->getElementType()->getPrimitiveSizeInBits()) return 0; - + SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements()); InVal = IC.Builder->CreateBitCast(InVal, SrcTy); } - + // Now that the element types match, get the shuffle mask and RHS of the // shuffle to use, which depends on whether we're increasing or decreasing the // size of the input. SmallVector<uint32_t, 16> ShuffleMask; Value *V2; - + if (SrcTy->getNumElements() > DestTy->getNumElements()) { // If we're shrinking the number of elements, just shuffle in the low // elements from the input and use undef as the second shuffle input. V2 = UndefValue::get(SrcTy); for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i) ShuffleMask.push_back(i); - + } else { // If we're increasing the number of elements, shuffle in all of the // elements from InVal and fill the rest of the result elements with zeros @@ -1438,7 +1459,7 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy, for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i) ShuffleMask.push_back(SrcElts); } - + return new ShuffleVectorInst(InVal, V2, ConstantDataVector::get(V2->getContext(), ShuffleMask)); @@ -1465,7 +1486,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, Type *VecEltTy) { // Undef values never contribute useful bits to the result. if (isa<UndefValue>(V)) return true; - + // If we got down to a value of the right type, we win, try inserting into the // right element. if (V->getType() == VecEltTy) { @@ -1473,15 +1494,15 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, if (Constant *C = dyn_cast<Constant>(V)) if (C->isNullValue()) return true; - + // Fail if multiple elements are inserted into this slot. if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0) return false; - + Elements[ElementIndex] = V; return true; } - + if (Constant *C = dyn_cast<Constant>(V)) { // Figure out the # elements this provides, and bitcast it or slice it up // as required. @@ -1492,7 +1513,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, if (NumElts == 1) return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy), ElementIndex, Elements, VecEltTy); - + // Okay, this is a constant that covers multiple elements. Slice it up into // pieces and insert each element-sized piece into the vector. if (!isa<IntegerType>(C->getType())) @@ -1500,7 +1521,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, C->getType()->getPrimitiveSizeInBits())); unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits(); Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize); - + for (unsigned i = 0; i != NumElts; ++i) { Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(), i*ElementSize)); @@ -1510,23 +1531,23 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, } return true; } - + if (!V->hasOneUse()) return false; - + Instruction *I = dyn_cast<Instruction>(V); if (I == 0) return false; switch (I->getOpcode()) { default: return false; // Unhandled case. case Instruction::BitCast: return CollectInsertionElements(I->getOperand(0), ElementIndex, - Elements, VecEltTy); + Elements, VecEltTy); case Instruction::ZExt: if (!isMultipleOfTypeSize( I->getOperand(0)->getType()->getPrimitiveSizeInBits(), VecEltTy)) return false; return CollectInsertionElements(I->getOperand(0), ElementIndex, - Elements, VecEltTy); + Elements, VecEltTy); case Instruction::Or: return CollectInsertionElements(I->getOperand(0), ElementIndex, Elements, VecEltTy) && @@ -1538,11 +1559,11 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex, if (CI == 0) return false; if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false; unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy); - + return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift, Elements, VecEltTy); } - + } } @@ -1577,11 +1598,11 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, Value *Result = Constant::getNullValue(CI.getType()); for (unsigned i = 0, e = Elements.size(); i != e; ++i) { if (Elements[i] == 0) continue; // Unset element. - + Result = IC.Builder->CreateInsertElement(Result, Elements[i], IC.Builder->getInt32(i)); } - + return Result; } @@ -1589,6 +1610,9 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, /// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double /// bitcast. The various long double bitcasts can't get in here. static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ + // We need to know the target byte order to perform this optimization. + if (!IC.getDataLayout()) return 0; + Value *Src = CI.getOperand(0); Type *DestTy = CI.getType(); @@ -1609,11 +1633,14 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ VecTy->getPrimitiveSizeInBits() / DestWidth); VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); } - - return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0)); + + unsigned Elt = 0; + if (IC.getDataLayout()->isBigEndian()) + Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1; + return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); } } - + // bitcast(trunc(lshr(bitcast(somevector), cst)) ConstantInt *ShAmt = 0; if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)), @@ -1630,8 +1657,10 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ VecTy->getPrimitiveSizeInBits() / DestWidth); VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); } - + unsigned Elt = ShAmt->getZExtValue() / DestWidth; + if (IC.getDataLayout()->isBigEndian()) + Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt; return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); } } @@ -1654,12 +1683,12 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { PointerType *SrcPTy = cast<PointerType>(SrcTy); Type *DstElTy = DstPTy->getElementType(); Type *SrcElTy = SrcPTy->getElementType(); - + // If the address spaces don't match, don't eliminate the bitcast, which is // required for changing types. if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace()) return 0; - + // If we are casting a alloca to a pointer to a type of the same // size, rewrite the allocation instruction to allocate the "right" type. // There is no need to modify malloc calls because it is their bitcast that @@ -1667,14 +1696,14 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { if (AllocaInst *AI = dyn_cast<AllocaInst>(Src)) if (Instruction *V = PromoteCastOfAllocation(CI, *AI)) return V; - + // If the source and destination are pointers, and this cast is equivalent // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep. // This can enhance SROA and other transforms that want type-safe pointers. Constant *ZeroUInt = Constant::getNullValue(Type::getInt32Ty(CI.getContext())); unsigned NumZeros = 0; - while (SrcElTy != DstElTy && + while (SrcElTy != DstElTy && isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() && SrcElTy->getNumContainedTypes() /* not "{}" */) { SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt); @@ -1687,7 +1716,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { return GetElementPtrInst::CreateInBounds(Src, Idxs); } } - + // Try to optimize int -> float bitcasts. if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy)) if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this)) @@ -1700,7 +1729,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) } - + if (isa<IntegerType>(SrcTy)) { // If this is a cast from an integer to vector, check to see if the input // is a trunc or zext of a bitcast from vector. If so, we can replace all @@ -1713,7 +1742,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { cast<VectorType>(DestTy), *this)) return I; } - + // If the input is an 'or' instruction, we may be doing shifts and ors to // assemble the elements of the vector manually. Try to rip the code out // and replace it with insertelements. @@ -1723,18 +1752,29 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { } if (VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) { - if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) { - Value *Elem = - Builder->CreateExtractElement(Src, - Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); - return CastInst::Create(Instruction::BitCast, Elem, DestTy); + if (SrcVTy->getNumElements() == 1) { + // If our destination is not a vector, then make this a straight + // scalar-scalar cast. + if (!DestTy->isVectorTy()) { + Value *Elem = + Builder->CreateExtractElement(Src, + Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); + return CastInst::Create(Instruction::BitCast, Elem, DestTy); + } + + // Otherwise, see if our source is an insert. If so, then use the scalar + // component directly. + if (InsertElementInst *IEI = + dyn_cast<InsertElementInst>(CI.getOperand(0))) + return CastInst::Create(Instruction::BitCast, IEI->getOperand(1), + DestTy); } } if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) { // Okay, we have (bitcast (shuffle ..)). Check to see if this is // a bitcast to a vector with the same # elts. - if (SVI->hasOneUse() && DestTy->isVectorTy() && + if (SVI->hasOneUse() && DestTy->isVectorTy() && cast<VectorType>(DestTy)->getNumElements() == SVI->getType()->getNumElements() && SVI->getType()->getNumElements() == @@ -1743,9 +1783,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If either of the operands is a cast from CI.getType(), then // evaluating the shuffle in the casted destination's type will allow // us to eliminate at least one cast. - if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) && + if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) && Tmp->getOperand(0)->getType() == DestTy) || - ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) && + ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) && Tmp->getOperand(0)->getType() == DestTy)) { Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy); Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy); @@ -1755,7 +1795,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { } } } - + if (SrcTy->isPointerTy()) return commonPointerCastTransforms(CI); return commonCastTransforms(CI); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 7c3f8fe..a96e754 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -12,15 +12,15 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/IntrinsicInst.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; using namespace PatternMatch; @@ -139,6 +139,31 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, } } +/// Returns true if the exploded icmp can be expressed as a signed comparison +/// to zero and updates the predicate accordingly. +/// The signedness of the comparison is preserved. +static bool isSignTest(ICmpInst::Predicate &pred, const ConstantInt *RHS) { + if (!ICmpInst::isSigned(pred)) + return false; + + if (RHS->isZero()) + return ICmpInst::isRelational(pred); + + if (RHS->isOne()) { + if (pred == ICmpInst::ICMP_SLT) { + pred = ICmpInst::ICMP_SLE; + return true; + } + } else if (RHS->isAllOnesValue()) { + if (pred == ICmpInst::ICMP_SGT) { + pred = ICmpInst::ICMP_SGE; + return true; + } + } + + return false; +} + // isHighOnes - Return true if the constant is of the form 1+0+. // This is the same as lowones(~X). static bool isHighOnes(const ConstantInt *CI) { @@ -443,20 +468,29 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, } - // If a 32-bit or 64-bit magic bitvector captures the entire comparison state + // If a magic bitvector captures the entire comparison state // of this load, replace it with computation that does: // ((magic_cst >> i) & 1) != 0 - if (ArrayElementCount <= 32 || - (TD && ArrayElementCount <= 64 && TD->isLegalInteger(64))) { - Type *Ty; - if (ArrayElementCount <= 32) + { + Type *Ty = 0; + + // Look for an appropriate type: + // - The type of Idx if the magic fits + // - The smallest fitting legal type if we have a DataLayout + // - Default to i32 + if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth()) + Ty = Idx->getType(); + else if (TD) + Ty = TD->getSmallestLegalIntType(Init->getContext(), ArrayElementCount); + else if (ArrayElementCount <= 32) Ty = Type::getInt32Ty(Init->getContext()); - else - Ty = Type::getInt64Ty(Init->getContext()); - Value *V = Builder->CreateIntCast(Idx, Ty, false); - V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); - V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); - return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); + + if (Ty != 0) { + Value *V = Builder->CreateIntCast(Idx, Ty, false); + V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); + V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); + return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); + } } return 0; @@ -1226,6 +1260,16 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, ICI.setOperand(0, NewAnd); return &ICI; } + + // Replace ((X & AndCST) > RHSV) with ((X & AndCST) != 0), if any + // bit set in (X & AndCST) will produce a result greater than RHSV. + if (ICI.getPredicate() == ICmpInst::ICMP_UGT) { + unsigned NTZ = AndCST->getValue().countTrailingZeros(); + if ((NTZ < AndCST->getBitWidth()) && + APInt::getOneBitSet(AndCST->getBitWidth(), NTZ).ugt(RHSV)) + return new ICmpInst(ICmpInst::ICMP_NE, LHSI, + Constant::getNullValue(RHS->getType())); + } } // Try to optimize things like "A[i]&42 == 0" to index computations. @@ -1263,6 +1307,23 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, break; } + case Instruction::Mul: { // (icmp pred (mul X, Val), CI) + ConstantInt *Val = dyn_cast<ConstantInt>(LHSI->getOperand(1)); + if (!Val) break; + + // If this is a signed comparison to 0 and the mul is sign preserving, + // use the mul LHS operand instead. + ICmpInst::Predicate pred = ICI.getPredicate(); + if (isSignTest(pred, RHS) && !Val->isZero() && + cast<BinaryOperator>(LHSI)->hasNoSignedWrap()) + return new ICmpInst(Val->isNegative() ? + ICmpInst::getSwappedPredicate(pred) : pred, + LHSI->getOperand(0), + Constant::getNullValue(RHS->getType())); + + break; + } + case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI) ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)); if (!ShAmt) break; @@ -1294,6 +1355,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), ConstantExpr::getLShr(RHS, ShAmt)); + // If the shift is NSW and we compare to 0, then it is just shifting out + // sign bits, no need for an AND either. + if (cast<BinaryOperator>(LHSI)->hasNoSignedWrap() && RHSV == 0) + return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), + ConstantExpr::getLShr(RHS, ShAmt)); + if (LHSI->hasOneUse()) { // Otherwise strength reduce the shift into an and. uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); @@ -1308,6 +1375,15 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } } + // If this is a signed comparison to 0 and the shift is sign preserving, + // use the shift LHS operand instead. + ICmpInst::Predicate pred = ICI.getPredicate(); + if (isSignTest(pred, RHS) && + cast<BinaryOperator>(LHSI)->hasNoSignedWrap()) + return new ICmpInst(pred, + LHSI->getOperand(0), + Constant::getNullValue(RHS->getType())); + // Otherwise, if this is a comparison of the sign bit, simplify to and/test. bool TrueIfSigned = false; if (LHSI->hasOneUse() && @@ -1321,6 +1397,26 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, And, Constant::getNullValue(And->getType())); } + + // Transform (icmp pred iM (shl iM %v, N), CI) + // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (CI>>N)) + // Transform the shl to a trunc if (trunc (CI>>N)) has no loss and M-N. + // This enables to get rid of the shift in favor of a trunc which can be + // free on the target. It has the additional benefit of comparing to a + // smaller constant, which will be target friendly. + unsigned Amt = ShAmt->getLimitedValue(TypeBits-1); + if (LHSI->hasOneUse() && + Amt != 0 && RHSV.countTrailingZeros() >= Amt) { + Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt); + Constant *NCI = ConstantExpr::getTrunc( + ConstantExpr::getAShr(RHS, + ConstantInt::get(RHS->getType(), Amt)), + NTy); + return new ICmpInst(ICI.getPredicate(), + Builder->CreateTrunc(LHSI->getOperand(0), NTy), + NCI); + } + break; } @@ -1502,6 +1598,19 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, return new ICmpInst(pred, X, NegX); } } + break; + case Instruction::Mul: + if (RHSV == 0 && BO->hasNoSignedWrap()) { + if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) { + // The trivial case (mul X, 0) is handled by InstSimplify + // General case : (mul X, C) != 0 iff X != 0 + // (mul X, C) == 0 iff X == 0 + if (!BOC->isZero()) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + Constant::getNullValue(RHS->getType())); + } + } + break; default: break; } } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) { diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 4d106fc..337cfe3 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -12,12 +12,12 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Loads.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumDeadStore, "Number of dead stores eliminated"); @@ -150,26 +150,6 @@ isOnlyCopiedFromConstantGlobal(AllocaInst *AI, return 0; } -/// getPointeeAlignment - Compute the minimum alignment of the value pointed -/// to by the given pointer. -static unsigned getPointeeAlignment(Value *V, const DataLayout &TD) { - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - if (CE->getOpcode() == Instruction::BitCast || - (CE->getOpcode() == Instruction::GetElementPtr && - cast<GEPOperator>(CE)->hasAllZeroIndices())) - return getPointeeAlignment(CE->getOperand(0), TD); - - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) - if (!GV->isDeclaration()) - return TD.getPreferredAlignment(GV); - - if (PointerType *PT = dyn_cast<PointerType>(V->getType())) - if (PT->getElementType()->isSized()) - return TD.getABITypeAlignment(PT->getElementType()); - - return 0; -} - Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Ensure that the alloca array size argument has type intptr_t, so that // any casting is exposed early. @@ -265,7 +245,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { } } - if (TD) { + if (AI.getAlignment()) { // Check to see if this allocation is only modified by a memcpy/memmove from // a constant global whose alignment is equal to or exceeds that of the // allocation. If this is the case, we can change all users to use @@ -274,7 +254,9 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // is only subsequently read. SmallVector<Instruction *, 4> ToDelete; if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { - if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) { + unsigned SourceAlign = getOrEnforceKnownAlignment(Copy->getSource(), + AI.getAlignment(), TD); + if (AI.getAlignment() <= SourceAlign) { DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) @@ -820,6 +802,13 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { InsertNewInstBefore(NewSI, *BBI); NewSI->setDebugLoc(OtherStore->getDebugLoc()); + // If the two stores had the same TBAA tag, preserve it. + if (MDNode *TBAATag = SI.getMetadata(LLVMContext::MD_tbaa)) + if ((TBAATag = MDNode::getMostGenericTBAA(TBAATag, + OtherStore->getMetadata(LLVMContext::MD_tbaa)))) + NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag); + + // Nuke the old stores. EraseInstFromFunction(SI); EraseInstFromFunction(*OtherStore); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index cefe45e..173f2bf 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -13,8 +13,8 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/IntrinsicInst.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; @@ -37,7 +37,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(PowerOf2), m_Value(A))), m_Value(B))) && // The "1" can be any value known to be a power of 2. - isPowerOfTwo(PowerOf2, IC.getDataLayout())) { + isKnownToBeAPowerOfTwo(PowerOf2)) { A = IC.Builder->CreateSub(A, B); return IC.Builder->CreateShl(PowerOf2, A); } @@ -45,8 +45,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { // (PowerOfTwo >>u B) --> isExact since shifting out the result would make it // inexact. Similarly for <<. if (BinaryOperator *I = dyn_cast<BinaryOperator>(V)) - if (I->isLogicalShift() && - isPowerOfTwo(I->getOperand(0), IC.getDataLayout())) { + if (I->isLogicalShift() && isKnownToBeAPowerOfTwo(I->getOperand(0))) { // We know that this is an exact/nuw shift and that the input is a // non-zero context as well. if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC)) { @@ -252,24 +251,136 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { return Changed ? &I : 0; } +// +// Detect pattern: +// +// log2(Y*0.5) +// +// And check for corresponding fast math flags +// + +static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) { + + if (!Op->hasOneUse()) + return; + + IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op); + if (!II) + return; + if (II->getIntrinsicID() != Intrinsic::log2 || !II->hasUnsafeAlgebra()) + return; + Log2 = II; + + Value *OpLog2Of = II->getArgOperand(0); + if (!OpLog2Of->hasOneUse()) + return; + + Instruction *I = dyn_cast<Instruction>(OpLog2Of); + if (!I) + return; + if (I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra()) + return; + + ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(0)); + if (CFP && CFP->isExactlyValue(0.5)) { + Y = I->getOperand(1); + return; + } + CFP = dyn_cast<ConstantFP>(I->getOperand(1)); + if (CFP && CFP->isExactlyValue(0.5)) + Y = I->getOperand(0); +} + +/// Helper function of InstCombiner::visitFMul(BinaryOperator(). It returns +/// true iff the given value is FMul or FDiv with one and only one operand +/// being a normal constant (i.e. not Zero/NaN/Infinity). +static bool isFMulOrFDivWithConstant(Value *V) { + Instruction *I = dyn_cast<Instruction>(V); + if (!I || (I->getOpcode() != Instruction::FMul && + I->getOpcode() != Instruction::FDiv)) + return false; + + ConstantFP *C0 = dyn_cast<ConstantFP>(I->getOperand(0)); + ConstantFP *C1 = dyn_cast<ConstantFP>(I->getOperand(1)); + + if (C0 && C1) + return false; + + return (C0 && C0->getValueAPF().isNormal()) || + (C1 && C1->getValueAPF().isNormal()); +} + +static bool isNormalFp(const ConstantFP *C) { + const APFloat &Flt = C->getValueAPF(); + return Flt.isNormal() && !Flt.isDenormal(); +} + +/// foldFMulConst() is a helper routine of InstCombiner::visitFMul(). +/// The input \p FMulOrDiv is a FMul/FDiv with one and only one operand +/// being a constant (i.e. isFMulOrFDivWithConstant(FMulOrDiv) == true). +/// This function is to simplify "FMulOrDiv * C" and returns the +/// resulting expression. Note that this function could return NULL in +/// case the constants cannot be folded into a normal floating-point. +/// +Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C, + Instruction *InsertBefore) { + assert(isFMulOrFDivWithConstant(FMulOrDiv) && "V is invalid"); + + Value *Opnd0 = FMulOrDiv->getOperand(0); + Value *Opnd1 = FMulOrDiv->getOperand(1); + + ConstantFP *C0 = dyn_cast<ConstantFP>(Opnd0); + ConstantFP *C1 = dyn_cast<ConstantFP>(Opnd1); + + BinaryOperator *R = 0; + + // (X * C0) * C => X * (C0*C) + if (FMulOrDiv->getOpcode() == Instruction::FMul) { + Constant *F = ConstantExpr::getFMul(C1 ? C1 : C0, C); + if (isNormalFp(cast<ConstantFP>(F))) + R = BinaryOperator::CreateFMul(C1 ? Opnd0 : Opnd1, F); + } else { + if (C0) { + // (C0 / X) * C => (C0 * C) / X + ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFMul(C0, C)); + if (isNormalFp(F)) + R = BinaryOperator::CreateFDiv(F, Opnd1); + } else { + // (X / C1) * C => X * (C/C1) if C/C1 is not a denormal + ConstantFP *F = cast<ConstantFP>(ConstantExpr::getFDiv(C, C1)); + if (isNormalFp(F)) { + R = BinaryOperator::CreateFMul(Opnd0, F); + } else { + // (X / C1) * C => X / (C1/C) + Constant *F = ConstantExpr::getFDiv(C1, C); + if (isNormalFp(cast<ConstantFP>(F))) + R = BinaryOperator::CreateFDiv(Opnd0, F); + } + } + } + + if (R) { + R->setHasUnsafeAlgebra(true); + InsertNewInstWith(R, *InsertBefore); + } + + return R; +} + Instruction *InstCombiner::visitFMul(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - // Simplify mul instructions with a constant RHS. - if (Constant *Op1C = dyn_cast<Constant>(Op1)) { - if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1C)) { - // "In IEEE floating point, x*1 is not equivalent to x for nans. However, - // ANSI says we can drop signals, so we can do this anyway." (from GCC) - if (Op1F->isExactlyValue(1.0)) - return ReplaceInstUsesWith(I, Op0); // Eliminate 'fmul double %X, 1.0' - } else if (ConstantDataVector *Op1V = dyn_cast<ConstantDataVector>(Op1C)) { - // As above, vector X*splat(1.0) -> X in all defined cases. - if (ConstantFP *F = dyn_cast_or_null<ConstantFP>(Op1V->getSplatValue())) - if (F->isExactlyValue(1.0)) - return ReplaceInstUsesWith(I, Op0); - } + if (isa<Constant>(Op0)) + std::swap(Op0, Op1); + + if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), TD)) + return ReplaceInstUsesWith(I, V); + + bool AllowReassociate = I.hasUnsafeAlgebra(); + // Simplify mul instructions with a constant RHS. + if (isa<Constant>(Op1)) { // Try to fold constant mul into select arguments. if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) if (Instruction *R = FoldOpIntoSelect(I, SI)) @@ -278,11 +389,146 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { if (isa<PHINode>(Op0)) if (Instruction *NV = FoldOpIntoPhi(I)) return NV; + + ConstantFP *C = dyn_cast<ConstantFP>(Op1); + if (C && AllowReassociate && C->getValueAPF().isNormal()) { + // Let MDC denote an expression in one of these forms: + // X * C, C/X, X/C, where C is a constant. + // + // Try to simplify "MDC * Constant" + if (isFMulOrFDivWithConstant(Op0)) { + Value *V = foldFMulConst(cast<Instruction>(Op0), C, &I); + if (V) + return ReplaceInstUsesWith(I, V); + } + + // (MDC +/- C1) * C => (MDC * C) +/- (C1 * C) + Instruction *FAddSub = dyn_cast<Instruction>(Op0); + if (FAddSub && + (FAddSub->getOpcode() == Instruction::FAdd || + FAddSub->getOpcode() == Instruction::FSub)) { + Value *Opnd0 = FAddSub->getOperand(0); + Value *Opnd1 = FAddSub->getOperand(1); + ConstantFP *C0 = dyn_cast<ConstantFP>(Opnd0); + ConstantFP *C1 = dyn_cast<ConstantFP>(Opnd1); + bool Swap = false; + if (C0) { + std::swap(C0, C1); + std::swap(Opnd0, Opnd1); + Swap = true; + } + + if (C1 && C1->getValueAPF().isNormal() && + isFMulOrFDivWithConstant(Opnd0)) { + Value *M1 = ConstantExpr::getFMul(C1, C); + Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ? + foldFMulConst(cast<Instruction>(Opnd0), C, &I) : + 0; + if (M0 && M1) { + if (Swap && FAddSub->getOpcode() == Instruction::FSub) + std::swap(M0, M1); + + Value *R = (FAddSub->getOpcode() == Instruction::FAdd) ? + BinaryOperator::CreateFAdd(M0, M1) : + BinaryOperator::CreateFSub(M0, M1); + Instruction *RI = cast<Instruction>(R); + RI->copyFastMathFlags(&I); + return RI; + } + } + } + } + } + + + // Under unsafe algebra do: + // X * log2(0.5*Y) = X*log2(Y) - X + if (I.hasUnsafeAlgebra()) { + Value *OpX = NULL; + Value *OpY = NULL; + IntrinsicInst *Log2; + detectLog2OfHalf(Op0, OpY, Log2); + if (OpY) { + OpX = Op1; + } else { + detectLog2OfHalf(Op1, OpY, Log2); + if (OpY) { + OpX = Op0; + } + } + // if pattern detected emit alternate sequence + if (OpX && OpY) { + Log2->setArgOperand(0, OpY); + Value *FMulVal = Builder->CreateFMul(OpX, Log2); + Instruction *FMul = cast<Instruction>(FMulVal); + FMul->copyFastMathFlags(Log2); + Instruction *FSub = BinaryOperator::CreateFSub(FMulVal, OpX); + FSub->copyFastMathFlags(Log2); + return FSub; + } } - if (Value *Op0v = dyn_castFNegVal(Op0)) // -X * -Y = X*Y - if (Value *Op1v = dyn_castFNegVal(Op1)) - return BinaryOperator::CreateFMul(Op0v, Op1v); + // Handle symmetric situation in a 2-iteration loop + Value *Opnd0 = Op0; + Value *Opnd1 = Op1; + for (int i = 0; i < 2; i++) { + bool IgnoreZeroSign = I.hasNoSignedZeros(); + if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) { + Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign); + Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign); + + // -X * -Y => X*Y + if (N1) + return BinaryOperator::CreateFMul(N0, N1); + + if (Opnd0->hasOneUse()) { + // -X * Y => -(X*Y) (Promote negation as high as possible) + Value *T = Builder->CreateFMul(N0, Opnd1); + cast<Instruction>(T)->setDebugLoc(I.getDebugLoc()); + Instruction *Neg = BinaryOperator::CreateFNeg(T); + if (I.getFastMathFlags().any()) { + cast<Instruction>(T)->copyFastMathFlags(&I); + Neg->copyFastMathFlags(&I); + } + return Neg; + } + } + + // (X*Y) * X => (X*X) * Y where Y != X + // The purpose is two-fold: + // 1) to form a power expression (of X). + // 2) potentially shorten the critical path: After transformation, the + // latency of the instruction Y is amortized by the expression of X*X, + // and therefore Y is in a "less critical" position compared to what it + // was before the transformation. + // + if (AllowReassociate) { + Value *Opnd0_0, *Opnd0_1; + if (Opnd0->hasOneUse() && + match(Opnd0, m_FMul(m_Value(Opnd0_0), m_Value(Opnd0_1)))) { + Value *Y = 0; + if (Opnd0_0 == Opnd1 && Opnd0_1 != Opnd1) + Y = Opnd0_1; + else if (Opnd0_1 == Opnd1 && Opnd0_0 != Opnd1) + Y = Opnd0_0; + + if (Y) { + Instruction *T = cast<Instruction>(Builder->CreateFMul(Opnd1, Opnd1)); + T->copyFastMathFlags(&I); + T->setDebugLoc(I.getDebugLoc()); + + Instruction *R = BinaryOperator::CreateFMul(T, Y); + R->copyFastMathFlags(&I); + return R; + } + } + } + + if (!isa<Constant>(Op1)) + std::swap(Opnd0, Opnd1); + else + break; + } return Changed ? &I : 0; } @@ -567,21 +813,140 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { return 0; } +/// CvtFDivConstToReciprocal tries to convert X/C into X*1/C if C not a special +/// FP value and: +/// 1) 1/C is exact, or +/// 2) reciprocal is allowed. +/// If the convertion was successful, the simplified expression "X * 1/C" is +/// returned; otherwise, NULL is returned. +/// +static Instruction *CvtFDivConstToReciprocal(Value *Dividend, + ConstantFP *Divisor, + bool AllowReciprocal) { + const APFloat &FpVal = Divisor->getValueAPF(); + APFloat Reciprocal(FpVal.getSemantics()); + bool Cvt = FpVal.getExactInverse(&Reciprocal); + + if (!Cvt && AllowReciprocal && FpVal.isNormal()) { + Reciprocal = APFloat(FpVal.getSemantics(), 1.0f); + (void)Reciprocal.divide(FpVal, APFloat::rmNearestTiesToEven); + Cvt = !Reciprocal.isDenormal(); + } + + if (!Cvt) + return 0; + + ConstantFP *R; + R = ConstantFP::get(Dividend->getType()->getContext(), Reciprocal); + return BinaryOperator::CreateFMul(Dividend, R); +} + Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (Value *V = SimplifyFDivInst(Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); + bool AllowReassociate = I.hasUnsafeAlgebra(); + bool AllowReciprocal = I.hasAllowReciprocal(); + if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { - const APFloat &Op1F = Op1C->getValueAPF(); - - // If the divisor has an exact multiplicative inverse we can turn the fdiv - // into a cheaper fmul. - APFloat Reciprocal(Op1F.getSemantics()); - if (Op1F.getExactInverse(&Reciprocal)) { - ConstantFP *RFP = ConstantFP::get(Builder->getContext(), Reciprocal); - return BinaryOperator::CreateFMul(Op0, RFP); + if (AllowReassociate) { + ConstantFP *C1 = 0; + ConstantFP *C2 = Op1C; + Value *X; + Instruction *Res = 0; + + if (match(Op0, m_FMul(m_Value(X), m_ConstantFP(C1)))) { + // (X*C1)/C2 => X * (C1/C2) + // + Constant *C = ConstantExpr::getFDiv(C1, C2); + const APFloat &F = cast<ConstantFP>(C)->getValueAPF(); + if (F.isNormal() && !F.isDenormal()) + Res = BinaryOperator::CreateFMul(X, C); + } else if (match(Op0, m_FDiv(m_Value(X), m_ConstantFP(C1)))) { + // (X/C1)/C2 => X /(C2*C1) [=> X * 1/(C2*C1) if reciprocal is allowed] + // + Constant *C = ConstantExpr::getFMul(C1, C2); + const APFloat &F = cast<ConstantFP>(C)->getValueAPF(); + if (F.isNormal() && !F.isDenormal()) { + Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C), + AllowReciprocal); + if (!Res) + Res = BinaryOperator::CreateFDiv(X, C); + } + } + + if (Res) { + Res->setFastMathFlags(I.getFastMathFlags()); + return Res; + } + } + + // X / C => X * 1/C + if (Instruction *T = CvtFDivConstToReciprocal(Op0, Op1C, AllowReciprocal)) + return T; + + return 0; + } + + if (AllowReassociate && isa<ConstantFP>(Op0)) { + ConstantFP *C1 = cast<ConstantFP>(Op0), *C2; + Constant *Fold = 0; + Value *X; + bool CreateDiv = true; + + // C1 / (X*C2) => (C1/C2) / X + if (match(Op1, m_FMul(m_Value(X), m_ConstantFP(C2)))) + Fold = ConstantExpr::getFDiv(C1, C2); + else if (match(Op1, m_FDiv(m_Value(X), m_ConstantFP(C2)))) { + // C1 / (X/C2) => (C1*C2) / X + Fold = ConstantExpr::getFMul(C1, C2); + } else if (match(Op1, m_FDiv(m_ConstantFP(C2), m_Value(X)))) { + // C1 / (C2/X) => (C1/C2) * X + Fold = ConstantExpr::getFDiv(C1, C2); + CreateDiv = false; + } + + if (Fold) { + const APFloat &FoldC = cast<ConstantFP>(Fold)->getValueAPF(); + if (FoldC.isNormal() && !FoldC.isDenormal()) { + Instruction *R = CreateDiv ? + BinaryOperator::CreateFDiv(Fold, X) : + BinaryOperator::CreateFMul(X, Fold); + R->setFastMathFlags(I.getFastMathFlags()); + return R; + } + } + return 0; + } + + if (AllowReassociate) { + Value *X, *Y; + Value *NewInst = 0; + Instruction *SimpR = 0; + + if (Op0->hasOneUse() && match(Op0, m_FDiv(m_Value(X), m_Value(Y)))) { + // (X/Y) / Z => X / (Y*Z) + // + if (!isa<ConstantFP>(Y) || !isa<ConstantFP>(Op1)) { + NewInst = Builder->CreateFMul(Y, Op1); + SimpR = BinaryOperator::CreateFDiv(X, NewInst); + } + } else if (Op1->hasOneUse() && match(Op1, m_FDiv(m_Value(X), m_Value(Y)))) { + // Z / (X/Y) => Z*Y / X + // + if (!isa<ConstantFP>(Y) || !isa<ConstantFP>(Op0)) { + NewInst = Builder->CreateFMul(Op0, Y); + SimpR = BinaryOperator::CreateFDiv(NewInst, X); + } + } + + if (NewInst) { + if (Instruction *T = dyn_cast<Instruction>(NewInst)) + T->setDebugLoc(I.getDebugLoc()); + SimpR->setFastMathFlags(I.getFastMathFlags()); + return SimpR; } } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp index de9c77e..b0a998c 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -12,10 +12,10 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/DataLayout.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/DataLayout.h" using namespace llvm; /// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)] diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index a2d4c88..121aa1f 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -12,9 +12,9 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/Support/PatternMatch.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; @@ -127,13 +127,14 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, // If this is a non-volatile load or a cast from the same type, // merge. if (TI->isCast()) { - if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType()) + Type *FIOpndTy = FI->getOperand(0)->getType(); + if (TI->getOperand(0)->getType() != FIOpndTy) return 0; // The select condition may be a vector. We may only change the operand // type if the vector width remains the same (and matches the condition). Type *CondTy = SI.getCondition()->getType(); - if (CondTy->isVectorTy() && CondTy->getVectorNumElements() != - FI->getOperand(0)->getType()->getVectorNumElements()) + if (CondTy->isVectorTy() && (!FIOpndTy->isVectorTy() || + CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements())) return 0; } else { return 0; // unknown unary op. diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 57021f1..8cf76e5 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -12,9 +12,9 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" -#include "llvm/IntrinsicInst.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; @@ -49,7 +49,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { I.setOperand(1, Rem); return &I; } - + return 0; } @@ -70,10 +70,10 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, // We can always evaluate constants shifted. if (isa<Constant>(V)) return true; - + Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; - + // If this is the opposite shift, we can directly reuse the input of the shift // if the needed bits are already zero in the input. This allows us to reuse // the value which means that we don't care if the shift has multiple uses. @@ -95,14 +95,14 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, return CanEvaluateTruncated(I->getOperand(0), Ty); } #endif - + } } - + // We can't mutate something that has multiple uses: doing so would // require duplicating the instruction in general, which isn't profitable. if (!I->hasOneUse()) return false; - + switch (I->getOpcode()) { default: return false; case Instruction::And: @@ -111,7 +111,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted. return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) && CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC); - + case Instruction::Shl: { // We can often fold the shift into shifts-by-a-constant. CI = dyn_cast<ConstantInt>(I->getOperand(1)); @@ -119,10 +119,10 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, // We can always fold shl(c1)+shl(c2) -> shl(c1+c2). if (isLeftShift) return true; - + // We can always turn shl(c)+shr(c) -> and(c2). if (CI->getValue() == NumBits) return true; - + unsigned TypeWidth = I->getType()->getScalarSizeInBits(); // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't @@ -133,20 +133,20 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits)) return true; } - + return false; } case Instruction::LShr: { // We can often fold the shift into shifts-by-a-constant. CI = dyn_cast<ConstantInt>(I->getOperand(1)); if (CI == 0) return false; - + // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). if (!isLeftShift) return true; - + // We can always turn lshr(c)+shl(c) -> and(c2). if (CI->getValue() == NumBits) return true; - + unsigned TypeWidth = I->getType()->getScalarSizeInBits(); // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't @@ -157,7 +157,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits)) return true; } - + return false; } case Instruction::Select: { @@ -175,7 +175,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, return false; return true; } - } + } } /// GetShiftedValue - When CanEvaluateShifted returned true for an expression, @@ -194,7 +194,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, IC.getTargetLibraryInfo()); return V; } - + Instruction *I = cast<Instruction>(V); IC.Worklist.Add(I); @@ -207,7 +207,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC)); I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); return I; - + case Instruction::Shl: { BinaryOperator *BO = cast<BinaryOperator>(I); unsigned TypeWidth = BO->getType()->getScalarSizeInBits(); @@ -227,7 +227,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, BO->setHasNoSignedWrap(false); return I; } - + // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have // zeros. if (CI->getValue() == NumBits) { @@ -240,7 +240,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, } return V; } - + // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that // the and won't be needed. assert(CI->getZExtValue() > NumBits); @@ -255,19 +255,19 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, unsigned TypeWidth = BO->getType()->getScalarSizeInBits(); // We only accept shifts-by-a-constant in CanEvaluateShifted. ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); - + // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). if (!isLeftShift) { // If this is oversized composite shift, then unsigned shifts get 0. unsigned NewShAmt = NumBits+CI->getZExtValue(); if (NewShAmt >= TypeWidth) return Constant::getNullValue(BO->getType()); - + BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt)); BO->setIsExact(false); return I; } - + // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have // zeros. if (CI->getValue() == NumBits) { @@ -280,7 +280,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, } return V; } - + // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that // the and won't be needed. assert(CI->getZExtValue() > NumBits); @@ -289,7 +289,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, BO->setIsExact(false); return BO; } - + case Instruction::Select: I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC)); I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC)); @@ -304,7 +304,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, NumBits, isLeftShift, IC)); return PN; } - } + } } @@ -312,24 +312,24 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, BinaryOperator &I) { bool isLeftShift = I.getOpcode() == Instruction::Shl; - - + + // See if we can propagate this shift into the input, this covers the trivial // cast of lshr(shl(x,c1),c2) as well as other more complex cases. if (I.getOpcode() != Instruction::AShr && CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) { DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression" " to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n"); - - return ReplaceInstUsesWith(I, + + return ReplaceInstUsesWith(I, GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this)); } - - - // See if we can simplify any instructions used by the instruction whose sole + + + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. uint32_t TypeBits = Op0->getType()->getScalarSizeInBits(); - + // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate // a signed shift. // @@ -340,14 +340,14 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1)); return &I; } - + // ((X*C1) << C2) == (X * (C1 << C2)) if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0)) if (BO->getOpcode() == Instruction::Mul && isLeftShift) if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1))) return BinaryOperator::CreateMul(BO->getOperand(0), ConstantExpr::getShl(BOOp, Op1)); - + // Try to fold constant and into select arguments. if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) if (Instruction *R = FoldOpIntoSelect(I, SI)) @@ -355,7 +355,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, if (isa<PHINode>(Op0)) if (Instruction *NV = FoldOpIntoPhi(I)) return NV; - + // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2)) if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) { Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0)); @@ -364,7 +364,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // require that the input operand is a shift-by-constant so that we have // confidence that the shifts will get folded together. We could do this // xform in more cases, but it is unlikely to be profitable. - if (TrOp && I.isLogicalShift() && TrOp->isShift() && + if (TrOp && I.isLogicalShift() && TrOp->isShift() && isa<ConstantInt>(TrOp->getOperand(1))) { // Okay, we'll do this xform. Make the shift of shift. Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); @@ -378,7 +378,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, unsigned SrcSize = TrOp->getType()->getScalarSizeInBits(); unsigned DstSize = TI->getType()->getScalarSizeInBits(); APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize)); - + // The mask we constructed says what the trunc would do if occurring // between the shifts. We want to know the effect *after* the second // shift. We know that it is a logical shift by a constant, so adjust the @@ -399,7 +399,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, return new TruncInst(And, I.getType()); } } - + if (Op0->hasOneUse()) { if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) { // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) @@ -425,14 +425,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); } - + // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C)) Value *Op0BOOp1 = Op0BO->getOperand(1); if (isLeftShift && Op0BOOp1->hasOneUse() && - match(Op0BOOp1, - m_And(m_Shr(m_Value(V1), m_Specific(Op1)), - m_ConstantInt(CC))) && - cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) { + match(Op0BOOp1, + m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))), + m_ConstantInt(CC)))) { Value *YS = // (Y << C) Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); @@ -442,7 +441,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); } } - + // FALL THROUGH. case Instruction::Sub: { // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) @@ -458,34 +457,32 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); } - + // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C) if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && match(Op0BO->getOperand(0), - m_And(m_Shr(m_Value(V1), m_Value(V2)), - m_ConstantInt(CC))) && V2 == Op1 && - cast<BinaryOperator>(Op0BO->getOperand(0)) - ->getOperand(0)->hasOneUse()) { + m_And(m_OneUse(m_Shr(m_Value(V1), m_Value(V2))), + m_ConstantInt(CC))) && V2 == Op1) { Value *YS = // (Y << C) Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); // X & (CC << C) Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), V1->getName()+".mask"); - + return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); } - + break; } } - - + + // If the operand is an bitwise operator with a constant RHS, and the // shift is the only use, we can pull it out of the shift. if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) { bool isValid = true; // Valid only for And, Or, Xor bool highBitSet = false; // Transform if high bit of constant set? - + switch (Op0BO->getOpcode()) { default: isValid = false; break; // Do not perform transform! case Instruction::Add: @@ -499,7 +496,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, highBitSet = true; break; } - + // If this is a signed shift right, and the high bit is modified // by the logical operation, do not perform the transformation. // The highBitSet boolean indicates the value of the high bit of @@ -508,26 +505,26 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // if (isValid && I.getOpcode() == Instruction::AShr) isValid = Op0C->getValue()[TypeBits-1] == highBitSet; - + if (isValid) { Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1); - + Value *NewShift = Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); NewShift->takeName(Op0BO); - + return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, NewRHS); } } } } - + // Find out if this is a shift of a shift by a constant. BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0); if (ShiftOp && !ShiftOp->isShift()) ShiftOp = 0; - + if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) { // This is a constant shift of a constant shift. Be careful about hiding @@ -548,9 +545,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, assert(ShiftAmt2 != 0 && "Should have been simplified earlier"); if (ShiftAmt1 == 0) return 0; // Will be simplified in the future. Value *X = ShiftOp->getOperand(0); - + IntegerType *Ty = cast<IntegerType>(I.getType()); - + // Check for (X << c1) << c2 and (X >> c1) >> c2 if (I.getOpcode() == ShiftOp->getOpcode()) { uint32_t AmtSum = ShiftAmt1+ShiftAmt2; // Fold into one big shift. @@ -561,11 +558,11 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); AmtSum = TypeBits-1; // Saturate to 31 for i32 ashr. } - + return BinaryOperator::Create(I.getOpcode(), X, ConstantInt::get(Ty, AmtSum)); } - + if (ShiftAmt1 == ShiftAmt2) { // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). if (I.getOpcode() == Instruction::LShr && @@ -605,7 +602,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, return NewLShr; } Value *Shift = Builder->CreateLShr(X, ShiftDiffCst); - + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); return BinaryOperator::CreateAnd(Shift, ConstantInt::get(I.getContext(),Mask)); @@ -653,12 +650,12 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, return NewShl; } Value *Shift = Builder->CreateShl(X, ShiftDiffCst); - + APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); return BinaryOperator::CreateAnd(Shift, ConstantInt::get(I.getContext(),Mask)); } - + // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However, // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits. if (I.getOpcode() == Instruction::AShr && @@ -682,21 +679,21 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), TD)) return ReplaceInstUsesWith(I, V); - + if (Instruction *V = commonShiftTransforms(I)) return V; - + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(I.getOperand(1))) { unsigned ShAmt = Op1C->getZExtValue(); - + // If the shifted-out value is known-zero, then this is a NUW shift. - if (!I.hasNoUnsignedWrap() && + if (!I.hasNoUnsignedWrap() && MaskedValueIsZero(I.getOperand(0), APInt::getHighBitsSet(Op1C->getBitWidth(), ShAmt))) { I.setHasNoUnsignedWrap(); return &I; } - + // If the shifted out value is all signbits, this is a NSW shift. if (!I.hasNoSignedWrap() && ComputeNumSignBits(I.getOperand(0)) > ShAmt) { @@ -712,7 +709,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { match(I.getOperand(1), m_Constant(C2))) return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A); - return 0; + return 0; } Instruction *InstCombiner::visitLShr(BinaryOperator &I) { @@ -722,9 +719,9 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { if (Instruction *R = commonShiftTransforms(I)) return R; - + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - + if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { unsigned ShAmt = Op1C->getZExtValue(); @@ -743,15 +740,15 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { return new ZExtInst(Cmp, II->getType()); } } - + // If the shifted-out value is known-zero, then this is an exact shift. - if (!I.isExact() && + if (!I.isExact() && MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){ I.setIsExact(); return &I; - } + } } - + return 0; } @@ -762,12 +759,12 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { if (Instruction *R = commonShiftTransforms(I)) return R; - + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { unsigned ShAmt = Op1C->getZExtValue(); - + // If the input is a SHL by the same constant (ashr (shl X, C), C), then we // have a sign-extend idiom. Value *X; @@ -791,23 +788,23 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { } // If the shifted-out value is known-zero, then this is an exact shift. - if (!I.isExact() && + if (!I.isExact() && MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){ I.setIsExact(); return &I; } - } - + } + // See if we can turn a signed shr into an unsigned shr. if (MaskedValueIsZero(Op0, APInt::getSignBit(I.getType()->getScalarSizeInBits()))) return BinaryOperator::CreateLShr(Op0, Op1); - + // Arithmetic shifting an all-sign-bit value is a no-op. unsigned NumSignBits = ComputeNumSignBits(Op0); if (NumSignBits == Op0->getType()->getScalarSizeInBits()) return ReplaceInstUsesWith(I, Op0); - + return 0; } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 602b203..8add1ea 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -14,17 +14,18 @@ #include "InstCombine.h" -#include "llvm/DataLayout.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/PatternMatch.h" using namespace llvm; +using namespace llvm::PatternMatch; - -/// ShrinkDemandedConstant - Check to see if the specified operand of the +/// ShrinkDemandedConstant - Check to see if the specified operand of the /// specified instruction is a constant integer. If so, check to see if there /// are any bits set in the constant that are not demanded. If so, shrink the /// constant and return true. -static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, +static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, APInt Demanded) { assert(I && "No instruction?"); assert(OpNo < I->getNumOperands() && "Operand index too large"); @@ -53,8 +54,8 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { unsigned BitWidth = Inst.getType()->getScalarSizeInBits(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); APInt DemandedMask(APInt::getAllOnesValue(BitWidth)); - - Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, + + Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne, 0); if (V == 0) return false; if (V == &Inst) return true; @@ -65,7 +66,7 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { /// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the /// specified instruction operand if possible, updating it in place. It returns /// true if it made any change and false otherwise. -bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, +bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne, unsigned Depth) { Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, @@ -86,7 +87,7 @@ bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, /// to be one in the expression. KnownZero contains all the bits that are known /// to be zero in the expression. These are provided to potentially allow the /// caller (which might recursively be SimplifyDemandedBits itself) to simplify -/// the expression. KnownOne and KnownZero always follow the invariant that +/// the expression. KnownOne and KnownZero always follow the invariant that /// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that /// the bits in KnownOne and KnownZero may only be accurate for those bits set /// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero @@ -133,10 +134,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return 0; return UndefValue::get(VTy); } - + if (Depth == 6) // Limit search depth. return 0; - + APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); @@ -158,61 +159,74 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If either the LHS or the RHS are Zero, the result is zero. ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); - + // If all of the demanded bits are known 1 on one side, return the other. // These bits cannot contribute to the result of the 'and' in this // context. - if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == + if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == (DemandedMask & ~LHSKnownZero)) return I->getOperand(0); - if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == + if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == (DemandedMask & ~RHSKnownZero)) return I->getOperand(1); - + // If all of the demanded bits in the inputs are known zeros, return zero. if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) return Constant::getNullValue(VTy); - + } else if (I->getOpcode() == Instruction::Or) { // We can simplify (X|Y) -> X or Y in the user's context if we know that // only bits from X or Y are demanded. - + // If either the LHS or the RHS are One, the result is One. ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); - + // If all of the demanded bits are known zero on one side, return the // other. These bits cannot contribute to the result of the 'or' in this // context. - if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == + if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == (DemandedMask & ~LHSKnownOne)) return I->getOperand(0); - if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == + if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == (DemandedMask & ~RHSKnownOne)) return I->getOperand(1); - + // If all of the potentially set bits on one side are known to be set on // the other side, just use the 'other' side. - if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == + if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == (DemandedMask & (~RHSKnownZero))) return I->getOperand(0); - if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == + if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == (DemandedMask & (~LHSKnownZero))) return I->getOperand(1); + } else if (I->getOpcode() == Instruction::Xor) { + // We can simplify (X^Y) -> X or Y in the user's context if we know that + // only bits from X or Y are demanded. + + ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); + + // If all of the demanded bits are known zero on one side, return the + // other. + if ((DemandedMask & RHSKnownZero) == DemandedMask) + return I->getOperand(0); + if ((DemandedMask & LHSKnownZero) == DemandedMask) + return I->getOperand(1); } - + // Compute the KnownZero/KnownOne bits to simplify things downstream. ComputeMaskedBits(I, KnownZero, KnownOne, Depth); return 0; } - + // If this is the root being simplified, allow it to have multiple uses, // just set the DemandedMask to all bits so that we can try to simplify the // operands. This allows visitTruncInst (for example) to simplify the // operand of a trunc without duplicating all the logic below. if (Depth == 0 && !V->hasOneUse()) DemandedMask = APInt::getAllOnesValue(BitWidth); - + switch (I->getOpcode()) { default: ComputeMaskedBits(I, KnownZero, KnownOne, Depth); @@ -224,26 +238,26 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero, LHSKnownZero, LHSKnownOne, Depth+1)) return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); // If all of the demanded bits are known 1 on one side, return the other. // These bits cannot contribute to the result of the 'and'. - if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == + if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == (DemandedMask & ~LHSKnownZero)) return I->getOperand(0); - if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == + if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == (DemandedMask & ~RHSKnownZero)) return I->getOperand(1); - + // If all of the demanded bits in the inputs are known zeros, return zero. if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) return Constant::getNullValue(VTy); - + // If the RHS is a constant, see if we can simplify it. if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero)) return I; - + // Output known-1 bits are only known if set in both the LHS & RHS. KnownOne = RHSKnownOne & LHSKnownOne; // Output known-0 are known to be clear if zero in either the LHS | RHS. @@ -251,36 +265,36 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, break; case Instruction::Or: // If either the LHS or the RHS are One, the result is One. - if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, + if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero, RHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne, + SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne, LHSKnownZero, LHSKnownOne, Depth+1)) return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); - + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'or'. - if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == + if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == (DemandedMask & ~LHSKnownOne)) return I->getOperand(0); - if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == + if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == (DemandedMask & ~RHSKnownOne)) return I->getOperand(1); // If all of the potentially set bits on one side are known to be set on // the other side, just use the 'other' side. - if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == + if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == (DemandedMask & (~RHSKnownZero))) return I->getOperand(0); - if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == + if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == (DemandedMask & (~LHSKnownZero))) return I->getOperand(1); - + // If the RHS is a constant, see if we can simplify it. if (ShrinkDemandedConstant(I, 1, DemandedMask)) return I; - + // Output known-0 bits are only known if clear in both the LHS & RHS. KnownZero = RHSKnownZero & LHSKnownZero; // Output known-1 are known to be set if set in either the LHS | RHS. @@ -289,34 +303,34 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, case Instruction::Xor: { if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero, RHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, + SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, LHSKnownZero, LHSKnownOne, Depth+1)) return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); - + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'xor'. if ((DemandedMask & RHSKnownZero) == DemandedMask) return I->getOperand(0); if ((DemandedMask & LHSKnownZero) == DemandedMask) return I->getOperand(1); - + // If all of the demanded bits are known to be zero on one side or the // other, turn this into an *inclusive* or. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) { - Instruction *Or = + Instruction *Or = BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), I->getName()); return InsertNewInstWith(Or, *I); } - + // If all of the demanded bits on one side are known, and all of the set // bits on that side are also known to be set on the other side, turn this // into an AND, as we know the bits will be cleared. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 - if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { + if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { // all known if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) { Constant *AndC = Constant::getIntegerValue(VTy, @@ -325,12 +339,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return InsertNewInstWith(And, *I); } } - + // If the RHS is a constant, see if we can simplify it. // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1. if (ShrinkDemandedConstant(I, 1, DemandedMask)) return I; - + // If our LHS is an 'and' and if it has one use, and if any of the bits we // are flipping are known to be set, then the xor is just resetting those // bits to zero. We can just knock out bits from the 'and' and the 'xor', @@ -343,12 +357,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1)); ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1)); APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask); - + Constant *AndC = ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); Instruction *NewAnd = BinaryOperator::CreateAnd(I->getOperand(0), AndC); InsertNewInstWith(NewAnd, *I); - + Constant *XorC = ConstantInt::get(I->getType(), NewMask & XorRHS->getValue()); Instruction *NewXor = BinaryOperator::CreateXor(NewAnd, XorC); @@ -364,17 +378,17 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, case Instruction::Select: if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero, RHSKnownOne, Depth+1) || - SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, + SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero, LHSKnownOne, Depth+1)) return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); - + assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); + assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + // If the operands are constants, see if we can simplify them. if (ShrinkDemandedConstant(I, 1, DemandedMask) || ShrinkDemandedConstant(I, 2, DemandedMask)) return I; - + // Only known if known in both the LHS and RHS. KnownOne = RHSKnownOne & LHSKnownOne; KnownZero = RHSKnownZero & LHSKnownZero; @@ -384,13 +398,13 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, DemandedMask = DemandedMask.zext(truncBf); KnownZero = KnownZero.zext(truncBf); KnownOne = KnownOne.zext(truncBf); - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, KnownOne, Depth+1)) return I; DemandedMask = DemandedMask.trunc(BitWidth); KnownZero = KnownZero.trunc(BitWidth); KnownOne = KnownOne.trunc(BitWidth); - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); break; } case Instruction::BitCast: @@ -413,12 +427,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, KnownOne, Depth+1)) return I; - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); break; case Instruction::ZExt: { // Compute the bits in the result that are not present in the input. unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); - + DemandedMask = DemandedMask.trunc(SrcBitWidth); KnownZero = KnownZero.trunc(SrcBitWidth); KnownOne = KnownOne.trunc(SrcBitWidth); @@ -428,7 +442,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, DemandedMask = DemandedMask.zext(BitWidth); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); // The top bits are known to be zero. KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); break; @@ -436,8 +450,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, case Instruction::SExt: { // Compute the bits in the result that are not present in the input. unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); - - APInt InputDemandedBits = DemandedMask & + + APInt InputDemandedBits = DemandedMask & APInt::getLowBitsSet(BitWidth, SrcBitWidth); APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth)); @@ -445,7 +459,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // bit is demanded. if ((NewBits & DemandedMask) != 0) InputDemandedBits.setBit(SrcBitWidth-1); - + InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth); KnownZero = KnownZero.trunc(SrcBitWidth); KnownOne = KnownOne.trunc(SrcBitWidth); @@ -455,8 +469,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, InputDemandedBits = InputDemandedBits.zext(BitWidth); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); - + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + // If the sign bit of the input is known set or clear, then we know the // top bits of the result. @@ -476,7 +490,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // are not demanded, then the add doesn't demand them from its input // either. unsigned NLZ = DemandedMask.countLeadingZeros(); - + // If there is a constant on the RHS, there are a variety of xformations // we can do. if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) { @@ -484,13 +498,13 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // won't work if the RHS is zero. if (RHS->isZero()) break; - + // If the top bit of the output is demanded, demand everything from the // input. Otherwise, we demand all the input bits except NLZ top bits. APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ)); // Find information about known zero/one bits in the input. - if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits, + if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits, LHSKnownZero, LHSKnownOne, Depth+1)) return I; @@ -498,11 +512,11 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // the constant. if (ShrinkDemandedConstant(I, 1, InDemandedBits)) return I; - + // Avoid excess work. if (LHSKnownZero == 0 && LHSKnownOne == 0) break; - + // Turn it into OR if input bits are zero. if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) { Instruction *Or = @@ -510,26 +524,26 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, I->getName()); return InsertNewInstWith(Or, *I); } - + // We can say something about the output known-zero and known-one bits, // depending on potential carries from the input constant and the // unknowns. For example if the LHS is known to have at most the 0x0F0F0 // bits set and the RHS constant is 0x01001, then we know we have a known // one mask of 0x00001 and a known zero mask of 0xE0F0E. - + // To compute this, we first compute the potential carry bits. These are // the bits which may be modified. I'm not aware of a better way to do // this scan. const APInt &RHSVal = RHS->getValue(); APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal)); - + // Now that we know which bits have carries, compute the known-1/0 sets. - + // Bits are known one if they are known zero in one operand and one in the // other, and there is no input carry. - KnownOne = ((LHSKnownZero & RHSVal) | + KnownOne = ((LHSKnownZero & RHSVal) | (LHSKnownOne & ~RHSVal)) & ~CarryBits; - + // Bits are known zero if they are known zero in both operands and there // is no input carry. KnownZero = LHSKnownZero & ~RHSVal & ~CarryBits; @@ -580,17 +594,28 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, break; case Instruction::Shl: if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { + { + Value *VarX; ConstantInt *C1; + if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) { + Instruction *Shr = cast<Instruction>(I->getOperand(0)); + Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask, + KnownZero, KnownOne); + if (R) + return R; + } + } + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt)); - + // If the shift is NUW/NSW, then it does demand the high bits. ShlOperator *IOp = cast<ShlOperator>(I); if (IOp->hasNoSignedWrap()) DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1); else if (IOp->hasNoUnsignedWrap()) DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt); - - if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, + + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, KnownOne, Depth+1)) return I; assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); @@ -605,15 +630,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // For a logical shift right if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); - + // Unsigned shift right. APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); - + // If the shift is exact, then it does demand the low bits (and knows that // they are zero). if (cast<LShrOperator>(I)->isExact()) DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt); - + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, KnownOne, Depth+1)) return I; @@ -637,28 +662,28 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Instruction *NewVal = BinaryOperator::CreateLShr( I->getOperand(0), I->getOperand(1), I->getName()); return InsertNewInstWith(NewVal, *I); - } + } // If the sign bit is the only bit demanded by this ashr, then there is no // need to do it, the shift doesn't change the high bit. if (DemandedMask.isSignBit()) return I->getOperand(0); - + if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1); - + // Signed shift right. APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); // If any of the "high bits" are demanded, we should set the sign bit as // demanded. if (DemandedMask.countLeadingZeros() <= ShiftAmt) DemandedMaskIn.setBit(BitWidth-1); - + // If the shift is exact, then it does demand the low bits (and knows that // they are zero). if (cast<AShrOperator>(I)->isExact()) DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt); - + if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero, KnownOne, Depth+1)) return I; @@ -667,15 +692,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); - + // Handle the sign bits. APInt SignBit(APInt::getSignBit(BitWidth)); // Adjust to where it is now in the mask. - SignBit = APIntOps::lshr(SignBit, ShiftAmt); - + SignBit = APIntOps::lshr(SignBit, ShiftAmt); + // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. - if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] || + if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] || (HighBits & ~DemandedMask) == HighBits) { // Perform the logical shift right. BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0), @@ -718,7 +743,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0)) KnownOne |= ~LowBits; - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); } } @@ -756,7 +781,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // just shift the input byte into position to eliminate the bswap. unsigned NLZ = DemandedMask.countLeadingZeros(); unsigned NTZ = DemandedMask.countTrailingZeros(); - + // Round NTZ down to the next byte. If we have 11 trailing zeros, then // we need all the bits down to bit 8. Likewise, round NLZ. If we // have 14 leading zeros, round to 8. @@ -766,7 +791,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (BitWidth-NLZ-NTZ == 8) { unsigned ResultBit = NTZ; unsigned InputBit = BitWidth-NTZ-8; - + // Replace this with either a left or right shift to get the byte into // the right place. Instruction *NewVal; @@ -779,7 +804,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, NewVal->takeName(I); return InsertNewInstWith(NewVal, *I); } - + // TODO: Could compute known zero/one bits based on the input. break; } @@ -792,7 +817,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, ComputeMaskedBits(V, KnownZero, KnownOne, Depth); break; } - + // If the client is only demanding bits that we know, return the known // constant. if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) @@ -800,6 +825,81 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return 0; } +/// Helper routine of SimplifyDemandedUseBits. It tries to simplify +/// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into +/// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign +/// of "C2-C1". +/// +/// Suppose E1 and E2 are generally different in bits S={bm, bm+1, +/// ..., bn}, without considering the specific value X is holding. +/// This transformation is legal iff one of following conditions is hold: +/// 1) All the bit in S are 0, in this case E1 == E2. +/// 2) We don't care those bits in S, per the input DemandedMask. +/// 3) Combination of 1) and 2). Some bits in S are 0, and we don't care the +/// rest bits. +/// +/// Currently we only test condition 2). +/// +/// As with SimplifyDemandedUseBits, it returns NULL if the simplification was +/// not successful. +Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr, + Instruction *Shl, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne) { + + unsigned ShlAmt = cast<ConstantInt>(Shl->getOperand(1))->getZExtValue(); + unsigned ShrAmt = cast<ConstantInt>(Shr->getOperand(1))->getZExtValue(); + + KnownOne.clearAllBits(); + KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1); + KnownZero &= DemandedMask; + + if (ShlAmt == 0 || ShrAmt == 0) + return 0; + + Value *VarX = Shr->getOperand(0); + Type *Ty = VarX->getType(); + + APInt BitMask1(APInt::getAllOnesValue(Ty->getIntegerBitWidth())); + APInt BitMask2(APInt::getAllOnesValue(Ty->getIntegerBitWidth())); + + bool isLshr = (Shr->getOpcode() == Instruction::LShr); + BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) : + (BitMask1.ashr(ShrAmt) << ShlAmt); + + if (ShrAmt <= ShlAmt) { + BitMask2 <<= (ShlAmt - ShrAmt); + } else { + BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt): + BitMask2.ashr(ShrAmt - ShlAmt); + } + + // Check if condition-2 (see the comment to this function) is satified. + if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) { + if (ShrAmt == ShlAmt) + return VarX; + + if (!Shr->hasOneUse()) + return 0; + + BinaryOperator *New; + if (ShrAmt < ShlAmt) { + Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt); + New = BinaryOperator::CreateShl(VarX, Amt); + BinaryOperator *Orig = cast<BinaryOperator>(Shl); + New->setHasNoSignedWrap(Orig->hasNoSignedWrap()); + New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap()); + } else { + Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt); + New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) : + BinaryOperator::CreateAShr(VarX, Amt); + if (cast<BinaryOperator>(Shr)->isExact()) + New->setIsExact(true); + } + + return InsertNewInstWith(New, *Shl); + } + + return 0; +} /// SimplifyDemandedVectorElts - The specified value produces a vector with /// any number of elements. DemandedElts contains the set of elements that are @@ -821,14 +921,14 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, UndefElts = EltMask; return 0; } - + if (DemandedElts == 0) { // If nothing is demanded, provide undef. UndefElts = EltMask; return UndefValue::get(V->getType()); } UndefElts = 0; - + // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential. if (Constant *C = dyn_cast<Constant>(V)) { // Check if this is identity. If so, return 0 since we are not simplifying @@ -838,7 +938,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, Type *EltTy = cast<VectorType>(V->getType())->getElementType(); Constant *Undef = UndefValue::get(EltTy); - + SmallVector<Constant*, 16> Elts; for (unsigned i = 0; i != VWidth; ++i) { if (!DemandedElts[i]) { // If not demanded, set to undef. @@ -846,10 +946,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, UndefElts.setBit(i); continue; } - + Constant *Elt = C->getAggregateElement(i); if (Elt == 0) return 0; - + if (isa<UndefValue>(Elt)) { // Already undef. Elts.push_back(Undef); UndefElts.setBit(i); @@ -857,12 +957,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, Elts.push_back(Elt); } } - + // If we changed the constant, return it. Constant *NewCV = ConstantVector::get(Elts); return NewCV != C ? NewCV : 0; } - + // Limit search depth. if (Depth == 10) return 0; @@ -881,16 +981,16 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // Conservatively assume that all elements are needed. DemandedElts = EltMask; } - + Instruction *I = dyn_cast<Instruction>(V); if (!I) return 0; // Only analyze instructions. - + bool MadeChange = false; APInt UndefElts2(VWidth, 0); Value *TmpV; switch (I->getOpcode()) { default: break; - + case Instruction::InsertElement: { // If this is a variable index, we don't know which element it overwrites. // demand exactly the same input as we produce. @@ -903,7 +1003,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } break; } - + // If this is inserting an element that isn't demanded, remove this // insertelement. unsigned IdxNo = Idx->getZExtValue(); @@ -911,7 +1011,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, Worklist.Add(I); return I->getOperand(0); } - + // Otherwise, the element inserted overwrites whatever was there, so the // input demanded set is simpler than the output set. APInt DemandedElts2 = DemandedElts; @@ -1007,7 +1107,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded, UndefElts2, Depth+1); if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; } - + // Output elements are undefined if both are undefined. UndefElts &= UndefElts2; break; @@ -1028,7 +1128,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, } else if (VWidth > InVWidth) { // Untested so far. break; - + // If there are more elements in the result than there are in the source, // then an input element is live if any of the corresponding output // elements are live. @@ -1040,7 +1140,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, } else { // Untested so far. break; - + // If there are more elements in the source than there are in the result, // then an input element is live if the corresponding output element is // live. @@ -1049,7 +1149,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, if (DemandedElts[InIdx/Ratio]) InputDemandedElts.setBit(InIdx); } - + // div/rem demand all inputs, because they don't want divide by zero. TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts, UndefElts2, Depth+1); @@ -1057,7 +1157,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, I->setOperand(0, TmpV); MadeChange = true; } - + UndefElts = UndefElts2; if (VWidth > InVWidth) { llvm_unreachable("Unimp"); @@ -1092,7 +1192,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts, UndefElts2, Depth+1); if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } - + // Output elements are undefined if both are undefined. Consider things // like undef&0. The result is known zero, not undef. UndefElts &= UndefElts2; @@ -1103,13 +1203,13 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, UndefElts, Depth+1); if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } break; - + case Instruction::Call: { IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); if (!II) break; switch (II->getIntrinsicID()) { default: break; - + // Binary vector operations that work column-wise. A dest element is a // function of the corresponding input elements from the two inputs. case Intrinsic::x86_sse_sub_ss: @@ -1140,11 +1240,11 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, Value *LHS = II->getArgOperand(0); Value *RHS = II->getArgOperand(1); // Extract the element as scalars. - LHS = InsertNewInstWith(ExtractElementInst::Create(LHS, + LHS = InsertNewInstWith(ExtractElementInst::Create(LHS, ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II); RHS = InsertNewInstWith(ExtractElementInst::Create(RHS, ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II); - + switch (II->getIntrinsicID()) { default: llvm_unreachable("Case stmts out of sync!"); case Intrinsic::x86_sse_sub_ss: @@ -1158,7 +1258,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, II->getName()), *II); break; } - + Instruction *New = InsertElementInst::Create( UndefValue::get(II->getType()), TmpV, @@ -1166,9 +1266,9 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, II->getName()); InsertNewInstWith(New, *II); return New; - } + } } - + // Output elements are undefined if both are undefined. Consider things // like undef&0. The result is known zero, not undef. UndefElts &= UndefElts2; diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index dd7ea14..4f71db1 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -13,7 +13,9 @@ //===----------------------------------------------------------------------===// #include "InstCombine.h" +#include "llvm/Support/PatternMatch.h" using namespace llvm; +using namespace PatternMatch; /// CheapToScalarize - Return true if the value is cheaper to scalarize than it /// is to leave as a vector operation. isConstant indicates whether we're @@ -92,6 +94,13 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) { return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth); } + // Extract a value from a vector add operation with a constant zero. + Value *Val = 0; Constant *Con = 0; + if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) { + if (Con->getAggregateElement(EltNo)->isNullValue()) + return FindScalarElement(Val, EltNo); + } + // Otherwise, we don't know. return 0; } @@ -295,12 +304,12 @@ static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask, Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); return V; } - + if (isa<ConstantAggregateZero>(V)) { Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0)); return V; } - + if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { // If this is an insert of an extract from some other vector, include it. Value *VecOp = IEI->getOperand(0); @@ -595,12 +604,12 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // ShuffleVectorInst is equivalent to the original one. for (unsigned i = 0; i < VWidth; ++i) { int eltMask; - if (Mask[i] == -1) { + if (Mask[i] < 0) { // This element is an undef value. eltMask = -1; } else if (Mask[i] < (int)LHSWidth) { // This element is from left hand side vector operand. - // + // // If LHS is going to be replaced (case 1, 2, or 4), calculate the // new mask value for the element. if (newLHS != LHS) { @@ -609,8 +618,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // with a -1 mask value. if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1)) eltMask = -1; - } - else + } else eltMask = Mask[i]; } else { // This element is from right hand side vector operand @@ -630,8 +638,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { && "should have been check above"); eltMask = -1; } - } - else + } else eltMask = Mask[i]-LHSWidth; // If LHS's width is changed, shift the mask value accordingly. diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h index ea654ae..49efce5 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineWorklist.h @@ -11,28 +11,28 @@ #define INSTCOMBINE_WORKLIST_H #define DEBUG_TYPE "instcombine" -#include "llvm/Instruction.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Compiler.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Instruction.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" namespace llvm { - + /// InstCombineWorklist - This is the worklist management logic for /// InstCombine. class LLVM_LIBRARY_VISIBILITY InstCombineWorklist { SmallVector<Instruction*, 256> Worklist; DenseMap<Instruction*, unsigned> WorklistMap; - + void operator=(const InstCombineWorklist&RHS) LLVM_DELETED_FUNCTION; InstCombineWorklist(const InstCombineWorklist&) LLVM_DELETED_FUNCTION; public: InstCombineWorklist() {} - + bool isEmpty() const { return Worklist.empty(); } - + /// Add - Add the specified instruction to the worklist if it isn't already /// in it. void Add(Instruction *I) { @@ -41,12 +41,12 @@ public: Worklist.push_back(I); } } - + void AddValue(Value *V) { if (Instruction *I = dyn_cast<Instruction>(V)) Add(I); } - + /// AddInitialGroup - Add the specified batch of stuff in reverse order. /// which should only be done when the worklist is empty and when the group /// has no duplicates. @@ -61,25 +61,25 @@ public: Worklist.push_back(I); } } - + // Remove - remove I from the worklist if it exists. void Remove(Instruction *I) { DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I); if (It == WorklistMap.end()) return; // Not in worklist. - + // Don't bother moving everything down, just null out the slot. Worklist[It->second] = 0; - + WorklistMap.erase(It); } - + Instruction *RemoveOne() { Instruction *I = Worklist.back(); Worklist.pop_back(); WorklistMap.erase(I); return I; } - + /// AddUsersToWorkList - When an instruction is simplified, add all users of /// the instruction to the work lists because they might get more simplified /// now. @@ -89,18 +89,18 @@ public: UI != UE; ++UI) Add(cast<Instruction>(*UI)); } - - + + /// Zap - check that the worklist is empty and nuke the backing store for /// the map if it is large. void Zap() { assert(WorklistMap.empty() && "Worklist empty, but map not?"); - + // Do an explicit clear, this shrinks the map if needed. WorklistMap.clear(); } }; - + } // end namespace llvm. #endif diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 9a46f25..c6115e3 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -36,22 +36,23 @@ #define DEBUG_TYPE "instcombine" #include "llvm/Transforms/Scalar.h" #include "InstCombine.h" -#include "llvm/IntrinsicInst.h" +#include "llvm-c/Initialization.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" #include "llvm/Support/ValueHandle.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm-c/Initialization.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Local.h" #include <algorithm> #include <climits> using namespace llvm; @@ -65,6 +66,11 @@ STATISTIC(NumExpand, "Number of expansions"); STATISTIC(NumFactor , "Number of factorizations"); STATISTIC(NumReassoc , "Number of reassociations"); +static cl::opt<bool> UnsafeFPShrink("enable-double-float-shrink", cl::Hidden, + cl::init(false), + cl::desc("Enable unsafe double to float " + "shrinking for math lib calls")); + // Initialization Routines void llvm::initializeInstCombine(PassRegistry &Registry) { initializeInstCombinerPass(Registry); @@ -156,6 +162,21 @@ static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { return !Overflow; } +/// Conservatively clears subclassOptionalData after a reassociation or +/// commutation. We preserve fast-math flags when applicable as they can be +/// preserved. +static void ClearSubclassDataAfterReassociation(BinaryOperator &I) { + FPMathOperator *FPMO = dyn_cast<FPMathOperator>(&I); + if (!FPMO) { + I.clearSubclassOptionalData(); + return; + } + + FastMathFlags FMF = I.getFastMathFlags(); + I.clearSubclassOptionalData(); + I.setFastMathFlags(FMF); +} + /// SimplifyAssociativeOrCommutative - This performs a few simplifications for /// operators which are associative or commutative: // @@ -213,7 +234,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { I.clearSubclassOptionalData(); I.setHasNoSignedWrap(true); } else { - I.clearSubclassOptionalData(); + ClearSubclassDataAfterReassociation(I); } Changed = true; @@ -235,7 +256,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { I.setOperand(1, C); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. - I.clearSubclassOptionalData(); + ClearSubclassDataAfterReassociation(I); Changed = true; ++NumReassoc; continue; @@ -257,7 +278,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { I.setOperand(1, B); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. - I.clearSubclassOptionalData(); + ClearSubclassDataAfterReassociation(I); Changed = true; ++NumReassoc; continue; @@ -277,7 +298,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { I.setOperand(1, V); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. - I.clearSubclassOptionalData(); + ClearSubclassDataAfterReassociation(I); Changed = true; ++NumReassoc; continue; @@ -304,7 +325,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { I.setOperand(1, Folded); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. - I.clearSubclassOptionalData(); + ClearSubclassDataAfterReassociation(I); Changed = true; continue; @@ -510,8 +531,8 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const { // instruction if the LHS is a constant negative zero (which is the 'negate' // form). // -Value *InstCombiner::dyn_castFNegVal(Value *V) const { - if (BinaryOperator::isFNeg(V)) +Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const { + if (BinaryOperator::isFNeg(V, IgnoreZeroSign)) return BinaryOperator::getFNegArgument(V); // Constants can be considered to be negated values if they can be folded. @@ -1303,17 +1324,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { /// into a gep of the original struct. This is important for SROA and alias /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) { + APInt Offset(TD ? TD->getPointerSizeInBits() : 1, 0); if (TD && - !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices() && + !isa<BitCastInst>(BCI->getOperand(0)) && + GEP.accumulateConstantOffset(*TD, Offset) && StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) { - // Determine how much the GEP moves the pointer. - SmallVector<Value*, 8> Ops(GEP.idx_begin(), GEP.idx_end()); - int64_t Offset = TD->getIndexedOffset(GEP.getPointerOperandType(), Ops); - // If this GEP instruction doesn't move the pointer, just replace the GEP // with a bitcast of the real input to the dest type. - if (Offset == 0) { + if (!Offset) { // If the bitcast is of an allocation, and the allocation will be // converted to match the type of the cast, don't touch this. if (isa<AllocaInst>(BCI->getOperand(0)) || @@ -1337,7 +1356,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector<Value*, 8> NewIndices; Type *InTy = cast<PointerType>(BCI->getOperand(0)->getType())->getElementType(); - if (FindElementAtOffset(InTy, Offset, NewIndices)) { + if (FindElementAtOffset(InTy, Offset.getSExtValue(), NewIndices)) { Value *NGEP = GEP.isInBounds() ? Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices) : Builder->CreateGEP(BCI->getOperand(0), NewIndices); @@ -1471,6 +1490,62 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) { return 0; } +/// \brief Move the call to free before a NULL test. +/// +/// Check if this free is accessed after its argument has been test +/// against NULL (property 0). +/// If yes, it is legal to move this call in its predecessor block. +/// +/// The move is performed only if the block containing the call to free +/// will be removed, i.e.: +/// 1. it has only one predecessor P, and P has two successors +/// 2. it contains the call and an unconditional branch +/// 3. its successor is the same as its predecessor's successor +/// +/// The profitability is out-of concern here and this function should +/// be called only if the caller knows this transformation would be +/// profitable (e.g., for code size). +static Instruction * +tryToMoveFreeBeforeNullTest(CallInst &FI) { + Value *Op = FI.getArgOperand(0); + BasicBlock *FreeInstrBB = FI.getParent(); + BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor(); + + // Validate part of constraint #1: Only one predecessor + // FIXME: We can extend the number of predecessor, but in that case, we + // would duplicate the call to free in each predecessor and it may + // not be profitable even for code size. + if (!PredBB) + return 0; + + // Validate constraint #2: Does this block contains only the call to + // free and an unconditional branch? + // FIXME: We could check if we can speculate everything in the + // predecessor block + if (FreeInstrBB->size() != 2) + return 0; + BasicBlock *SuccBB; + if (!match(FreeInstrBB->getTerminator(), m_UnconditionalBr(SuccBB))) + return 0; + + // Validate the rest of constraint #1 by matching on the pred branch. + TerminatorInst *TI = PredBB->getTerminator(); + BasicBlock *TrueBB, *FalseBB; + ICmpInst::Predicate Pred; + if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Op), m_Zero()), TrueBB, FalseBB))) + return 0; + if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE) + return 0; + + // Validate constraint #3: Ensure the null case just falls through. + if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB)) + return 0; + assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) && + "Broken CFG: missing edge from predecessor to successor"); + + FI.moveBefore(TI); + return &FI; +} Instruction *InstCombiner::visitFree(CallInst &FI) { @@ -1489,6 +1564,16 @@ Instruction *InstCombiner::visitFree(CallInst &FI) { if (isa<ConstantPointerNull>(Op)) return EraseInstFromFunction(FI); + // If we optimize for code size, try to move the call to free before the null + // test so that simplify cfg can remove the empty block and dead code + // elimination the branch. I.e., helps to turn something like: + // if (foo) free(foo); + // into + // free(foo); + if (MinimizeSize) + if (Instruction *I = tryToMoveFreeBeforeNullTest(FI)) + return I; + return 0; } @@ -2374,7 +2459,7 @@ public: InstCombinerLibCallSimplifier(const DataLayout *TD, const TargetLibraryInfo *TLI, InstCombiner *IC) - : LibCallSimplifier(TD, TLI) { + : LibCallSimplifier(TD, TLI, UnsafeFPShrink) { this->IC = IC; } @@ -2389,6 +2474,9 @@ public: bool InstCombiner::runOnFunction(Function &F) { TD = getAnalysisIfAvailable<DataLayout>(); TLI = &getAnalysis<TargetLibraryInfo>(); + // Minimizing size? + MinimizeSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::MinSize); /// Builder - This is an IRBuilder that automatically inserts new /// instructions into the worklist when they are created. |