diff options
Diffstat (limited to 'lib/Transforms/InstCombine/InstCombineAddSub.cpp')
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineAddSub.cpp | 948 |
1 files changed, 892 insertions, 56 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index d8257e6..7595da0 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -13,16 +13,840 @@ #include "InstCombine.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; +namespace { + + /// Class representing coefficient of floating-point addend. + /// This class needs to be highly efficient, which is especially true for + /// the constructor. As of I write this comment, the cost of the default + /// constructor is merely 4-byte-store-zero (Assuming compiler is able to + /// perform write-merging). + /// + class FAddendCoef { + public: + // The constructor has to initialize a APFloat, which is uncessary for + // most addends which have coefficient either 1 or -1. So, the constructor + // is expensive. In order to avoid the cost of the constructor, we should + // reuse some instances whenever possible. The pre-created instances + // FAddCombine::Add[0-5] embodies this idea. + // + FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {} + ~FAddendCoef(); + + void set(short C) { + assert(!insaneIntVal(C) && "Insane coefficient"); + IsFp = false; IntVal = C; + } + + void set(const APFloat& C); + + void negate(); + + bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); } + Value *getValue(Type *) const; + + // If possible, don't define operator+/operator- etc because these + // operators inevitably call FAddendCoef's constructor which is not cheap. + void operator=(const FAddendCoef &A); + void operator+=(const FAddendCoef &A); + void operator-=(const FAddendCoef &A); + void operator*=(const FAddendCoef &S); + + bool isOne() const { return isInt() && IntVal == 1; } + bool isTwo() const { return isInt() && IntVal == 2; } + bool isMinusOne() const { return isInt() && IntVal == -1; } + bool isMinusTwo() const { return isInt() && IntVal == -2; } + + private: + bool insaneIntVal(int V) { return V > 4 || V < -4; } + APFloat *getFpValPtr(void) + { return reinterpret_cast<APFloat*>(&FpValBuf.buffer[0]); } + const APFloat *getFpValPtr(void) const + { return reinterpret_cast<const APFloat*>(&FpValBuf.buffer[0]); } + + const APFloat &getFpVal(void) const { + assert(IsFp && BufHasFpVal && "Incorret state"); + return *getFpValPtr(); + } + + APFloat &getFpVal(void) + { assert(IsFp && BufHasFpVal && "Incorret state"); return *getFpValPtr(); } + + bool isInt() const { return !IsFp; } + + // If the coefficient is represented by an integer, promote it to a + // floating point. + void convertToFpType(const fltSemantics &Sem); + + // Construct an APFloat from a signed integer. + // TODO: We should get rid of this function when APFloat can be constructed + // from an *SIGNED* integer. + APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val); + private: + + bool IsFp; + + // True iff FpValBuf contains an instance of APFloat. + bool BufHasFpVal; + + // The integer coefficient of an individual addend is either 1 or -1, + // and we try to simplify at most 4 addends from neighboring at most + // two instructions. So the range of <IntVal> falls in [-4, 4]. APInt + // is overkill of this end. + short IntVal; + + AlignedCharArrayUnion<APFloat> FpValBuf; + }; + + /// FAddend is used to represent floating-point addend. An addend is + /// represented as <C, V>, where the V is a symbolic value, and C is a + /// constant coefficient. A constant addend is represented as <C, 0>. + /// + class FAddend { + public: + FAddend() { Val = 0; } + + Value *getSymVal (void) const { return Val; } + const FAddendCoef &getCoef(void) const { return Coeff; } + + bool isConstant() const { return Val == 0; } + bool isZero() const { return Coeff.isZero(); } + + void set(short Coefficient, Value *V) { Coeff.set(Coefficient), Val = V; } + void set(const APFloat& Coefficient, Value *V) + { Coeff.set(Coefficient); Val = V; } + void set(const ConstantFP* Coefficient, Value *V) + { Coeff.set(Coefficient->getValueAPF()); Val = V; } + + void negate() { Coeff.negate(); } + + /// Drill down the U-D chain one step to find the definition of V, and + /// try to break the definition into one or two addends. + static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1); + + /// Similar to FAddend::drillDownOneStep() except that the value being + /// splitted is the addend itself. + unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const; + + void operator+=(const FAddend &T) { + assert((Val == T.Val) && "Symbolic-values disagree"); + Coeff += T.Coeff; + } + + private: + void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; } + + // This addend has the value of "Coeff * Val". + Value *Val; + FAddendCoef Coeff; + }; + + /// FAddCombine is the class for optimizing an unsafe fadd/fsub along + /// with its neighboring at most two instructions. + /// + class FAddCombine { + public: + FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {} + Value *simplify(Instruction *FAdd); + + private: + typedef SmallVector<const FAddend*, 4> AddendVect; + + Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota); + + Value *performFactorization(Instruction *I); + + /// Convert given addend to a Value + Value *createAddendVal(const FAddend &A, bool& NeedNeg); + + /// Return the number of instructions needed to emit the N-ary addition. + unsigned calcInstrNumber(const AddendVect& Vect); + Value *createFSub(Value *Opnd0, Value *Opnd1); + Value *createFAdd(Value *Opnd0, Value *Opnd1); + Value *createFMul(Value *Opnd0, Value *Opnd1); + Value *createFDiv(Value *Opnd0, Value *Opnd1); + Value *createFNeg(Value *V); + Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota); + void createInstPostProc(Instruction *NewInst); + + InstCombiner::BuilderTy *Builder; + Instruction *Instr; + + private: + // Debugging stuff are clustered here. + #ifndef NDEBUG + unsigned CreateInstrNum; + void initCreateInstNum() { CreateInstrNum = 0; } + void incCreateInstNum() { CreateInstrNum++; } + #else + void initCreateInstNum() {} + void incCreateInstNum() {} + #endif + }; +} + +//===----------------------------------------------------------------------===// +// +// Implementation of +// {FAddendCoef, FAddend, FAddition, FAddCombine}. +// +//===----------------------------------------------------------------------===// +FAddendCoef::~FAddendCoef() { + if (BufHasFpVal) + getFpValPtr()->~APFloat(); +} + +void FAddendCoef::set(const APFloat& C) { + APFloat *P = getFpValPtr(); + + if (isInt()) { + // As the buffer is meanless byte stream, we cannot call + // APFloat::operator=(). + new(P) APFloat(C); + } else + *P = C; + + IsFp = BufHasFpVal = true; +} + +void FAddendCoef::convertToFpType(const fltSemantics &Sem) { + if (!isInt()) + return; + + APFloat *P = getFpValPtr(); + if (IntVal > 0) + new(P) APFloat(Sem, IntVal); + else { + new(P) APFloat(Sem, 0 - IntVal); + P->changeSign(); + } + IsFp = BufHasFpVal = true; +} + +APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) { + if (Val >= 0) + return APFloat(Sem, Val); + + APFloat T(Sem, 0 - Val); + T.changeSign(); + + return T; +} + +void FAddendCoef::operator=(const FAddendCoef &That) { + if (That.isInt()) + set(That.IntVal); + else + set(That.getFpVal()); +} + +void FAddendCoef::operator+=(const FAddendCoef &That) { + enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven; + if (isInt() == That.isInt()) { + if (isInt()) + IntVal += That.IntVal; + else + getFpVal().add(That.getFpVal(), RndMode); + return; + } + + if (isInt()) { + const APFloat &T = That.getFpVal(); + convertToFpType(T.getSemantics()); + getFpVal().add(T, RndMode); + return; + } + + APFloat &T = getFpVal(); + T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode); +} + +void FAddendCoef::operator-=(const FAddendCoef &That) { + enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven; + if (isInt() == That.isInt()) { + if (isInt()) + IntVal -= That.IntVal; + else + getFpVal().subtract(That.getFpVal(), RndMode); + return; + } + + if (isInt()) { + const APFloat &T = That.getFpVal(); + convertToFpType(T.getSemantics()); + getFpVal().subtract(T, RndMode); + return; + } + + APFloat &T = getFpVal(); + T.subtract(createAPFloatFromInt(T.getSemantics(), IntVal), RndMode); +} + +void FAddendCoef::operator*=(const FAddendCoef &That) { + if (That.isOne()) + return; + + if (That.isMinusOne()) { + negate(); + return; + } + + if (isInt() && That.isInt()) { + int Res = IntVal * (int)That.IntVal; + assert(!insaneIntVal(Res) && "Insane int value"); + IntVal = Res; + return; + } + + const fltSemantics &Semantic = + isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics(); + + if (isInt()) + convertToFpType(Semantic); + APFloat &F0 = getFpVal(); + + if (That.isInt()) + F0.multiply(createAPFloatFromInt(Semantic, That.IntVal), + APFloat::rmNearestTiesToEven); + else + F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven); + + return; +} + +void FAddendCoef::negate() { + if (isInt()) + IntVal = 0 - IntVal; + else + getFpVal().changeSign(); +} + +Value *FAddendCoef::getValue(Type *Ty) const { + return isInt() ? + ConstantFP::get(Ty, float(IntVal)) : + ConstantFP::get(Ty->getContext(), getFpVal()); +} + +// The definition of <Val> Addends +// ========================================= +// A + B <1, A>, <1,B> +// A - B <1, A>, <1,B> +// 0 - B <-1, B> +// C * A, <C, A> +// A + C <1, A> <C, NULL> +// 0 +/- 0 <0, NULL> (corner case) +// +// Legend: A and B are not constant, C is constant +// +unsigned FAddend::drillValueDownOneStep + (Value *Val, FAddend &Addend0, FAddend &Addend1) { + Instruction *I = 0; + if (Val == 0 || !(I = dyn_cast<Instruction>(Val))) + return 0; + + unsigned Opcode = I->getOpcode(); + + if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub) { + ConstantFP *C0, *C1; + Value *Opnd0 = I->getOperand(0); + Value *Opnd1 = I->getOperand(1); + if ((C0 = dyn_cast<ConstantFP>(Opnd0)) && C0->isZero()) + Opnd0 = 0; + + if ((C1 = dyn_cast<ConstantFP>(Opnd1)) && C1->isZero()) + Opnd1 = 0; + + if (Opnd0) { + if (!C0) + Addend0.set(1, Opnd0); + else + Addend0.set(C0, 0); + } + + if (Opnd1) { + FAddend &Addend = Opnd0 ? Addend1 : Addend0; + if (!C1) + Addend.set(1, Opnd1); + else + Addend.set(C1, 0); + if (Opcode == Instruction::FSub) + Addend.negate(); + } + + if (Opnd0 || Opnd1) + return Opnd0 && Opnd1 ? 2 : 1; + + // Both operands are zero. Weird! + Addend0.set(APFloat(C0->getValueAPF().getSemantics()), 0); + return 1; + } + + if (I->getOpcode() == Instruction::FMul) { + Value *V0 = I->getOperand(0); + Value *V1 = I->getOperand(1); + if (ConstantFP *C = dyn_cast<ConstantFP>(V0)) { + Addend0.set(C, V1); + return 1; + } + + if (ConstantFP *C = dyn_cast<ConstantFP>(V1)) { + Addend0.set(C, V0); + return 1; + } + } + + return 0; +} + +// Try to break *this* addend into two addends. e.g. Suppose this addend is +// <2.3, V>, and V = X + Y, by calling this function, we obtain two addends, +// i.e. <2.3, X> and <2.3, Y>. +// +unsigned FAddend::drillAddendDownOneStep + (FAddend &Addend0, FAddend &Addend1) const { + if (isConstant()) + return 0; + + unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1); + if (!BreakNum || Coeff.isOne()) + return BreakNum; + + Addend0.Scale(Coeff); + + if (BreakNum == 2) + Addend1.Scale(Coeff); + + return BreakNum; +} + +// Try to perform following optimization on the input instruction I. Return the +// simplified expression if was successful; otherwise, return 0. +// +// Instruction "I" is Simplified into +// ------------------------------------------------------- +// (x * y) +/- (x * z) x * (y +/- z) +// (y / x) +/- (z / x) (y +/- z) / x +// +Value *FAddCombine::performFactorization(Instruction *I) { + assert((I->getOpcode() == Instruction::FAdd || + I->getOpcode() == Instruction::FSub) && "Expect add/sub"); + + Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0)); + Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1)); + + if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode()) + return 0; + + bool isMpy = false; + if (I0->getOpcode() == Instruction::FMul) + isMpy = true; + else if (I0->getOpcode() != Instruction::FDiv) + return 0; + + Value *Opnd0_0 = I0->getOperand(0); + Value *Opnd0_1 = I0->getOperand(1); + Value *Opnd1_0 = I1->getOperand(0); + Value *Opnd1_1 = I1->getOperand(1); + + // Input Instr I Factor AddSub0 AddSub1 + // ---------------------------------------------- + // (x*y) +/- (x*z) x y z + // (y/x) +/- (z/x) x y z + // + Value *Factor = 0; + Value *AddSub0 = 0, *AddSub1 = 0; + + if (isMpy) { + if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1) + Factor = Opnd0_0; + else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1) + Factor = Opnd0_1; + + if (Factor) { + AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0; + AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0; + } + } else if (Opnd0_1 == Opnd1_1) { + Factor = Opnd0_1; + AddSub0 = Opnd0_0; + AddSub1 = Opnd1_0; + } + + if (!Factor) + return 0; + + // Create expression "NewAddSub = AddSub0 +/- AddsSub1" + Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ? + createFAdd(AddSub0, AddSub1) : + createFSub(AddSub0, AddSub1); + if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) { + const APFloat &F = CFP->getValueAPF(); + if (!F.isNormal() || F.isDenormal()) + return 0; + } + + if (isMpy) + return createFMul(Factor, NewAddSub); + + return createFDiv(NewAddSub, Factor); +} + +Value *FAddCombine::simplify(Instruction *I) { + assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode"); + + // Currently we are not able to handle vector type. + if (I->getType()->isVectorTy()) + return 0; + + assert((I->getOpcode() == Instruction::FAdd || + I->getOpcode() == Instruction::FSub) && "Expect add/sub"); + + // Save the instruction before calling other member-functions. + Instr = I; + + FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1; + + unsigned OpndNum = FAddend::drillValueDownOneStep(I, Opnd0, Opnd1); + + // Step 1: Expand the 1st addend into Opnd0_0 and Opnd0_1. + unsigned Opnd0_ExpNum = 0; + unsigned Opnd1_ExpNum = 0; + + if (!Opnd0.isConstant()) + Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1); + + // Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1. + if (OpndNum == 2 && !Opnd1.isConstant()) + Opnd1_ExpNum = Opnd1.drillAddendDownOneStep(Opnd1_0, Opnd1_1); + + // Step 3: Try to optimize Opnd0_0 + Opnd0_1 + Opnd1_0 + Opnd1_1 + if (Opnd0_ExpNum && Opnd1_ExpNum) { + AddendVect AllOpnds; + AllOpnds.push_back(&Opnd0_0); + AllOpnds.push_back(&Opnd1_0); + if (Opnd0_ExpNum == 2) + AllOpnds.push_back(&Opnd0_1); + if (Opnd1_ExpNum == 2) + AllOpnds.push_back(&Opnd1_1); + + // Compute instruction quota. We should save at least one instruction. + unsigned InstQuota = 0; + + Value *V0 = I->getOperand(0); + Value *V1 = I->getOperand(1); + InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) && + (!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1; + + if (Value *R = simplifyFAdd(AllOpnds, InstQuota)) + return R; + } + + if (OpndNum != 2) { + // The input instruction is : "I=0.0 +/- V". If the "V" were able to be + // splitted into two addends, say "V = X - Y", the instruction would have + // been optimized into "I = Y - X" in the previous steps. + // + const FAddendCoef &CE = Opnd0.getCoef(); + return CE.isOne() ? Opnd0.getSymVal() : 0; + } + + // step 4: Try to optimize Opnd0 + Opnd1_0 [+ Opnd1_1] + if (Opnd1_ExpNum) { + AddendVect AllOpnds; + AllOpnds.push_back(&Opnd0); + AllOpnds.push_back(&Opnd1_0); + if (Opnd1_ExpNum == 2) + AllOpnds.push_back(&Opnd1_1); + + if (Value *R = simplifyFAdd(AllOpnds, 1)) + return R; + } + + // step 5: Try to optimize Opnd1 + Opnd0_0 [+ Opnd0_1] + if (Opnd0_ExpNum) { + AddendVect AllOpnds; + AllOpnds.push_back(&Opnd1); + AllOpnds.push_back(&Opnd0_0); + if (Opnd0_ExpNum == 2) + AllOpnds.push_back(&Opnd0_1); + + if (Value *R = simplifyFAdd(AllOpnds, 1)) + return R; + } + + // step 6: Try factorization as the last resort, + return performFactorization(I); +} + +Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { + + unsigned AddendNum = Addends.size(); + assert(AddendNum <= 4 && "Too many addends"); + + // For saving intermediate results; + unsigned NextTmpIdx = 0; + FAddend TmpResult[3]; + + // Points to the constant addend of the resulting simplified expression. + // If the resulting expr has constant-addend, this constant-addend is + // desirable to reside at the top of the resulting expression tree. Placing + // constant close to supper-expr(s) will potentially reveal some optimization + // opportunities in super-expr(s). + // + const FAddend *ConstAdd = 0; + + // Simplified addends are placed <SimpVect>. + AddendVect SimpVect; + + // The outer loop works on one symbolic-value at a time. Suppose the input + // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ... + // The symbolic-values will be processed in this order: x, y, z. + // + for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) { + + const FAddend *ThisAddend = Addends[SymIdx]; + if (!ThisAddend) { + // This addend was processed before. + continue; + } + + Value *Val = ThisAddend->getSymVal(); + unsigned StartIdx = SimpVect.size(); + SimpVect.push_back(ThisAddend); + + // The inner loop collects addends sharing same symbolic-value, and these + // addends will be later on folded into a single addend. Following above + // example, if the symbolic value "y" is being processed, the inner loop + // will collect two addends "<b1,y>" and "<b2,Y>". These two addends will + // be later on folded into "<b1+b2, y>". + // + for (unsigned SameSymIdx = SymIdx + 1; + SameSymIdx < AddendNum; SameSymIdx++) { + const FAddend *T = Addends[SameSymIdx]; + if (T && T->getSymVal() == Val) { + // Set null such that next iteration of the outer loop will not process + // this addend again. + Addends[SameSymIdx] = 0; + SimpVect.push_back(T); + } + } + + // If multiple addends share same symbolic value, fold them together. + if (StartIdx + 1 != SimpVect.size()) { + FAddend &R = TmpResult[NextTmpIdx ++]; + R = *SimpVect[StartIdx]; + for (unsigned Idx = StartIdx + 1; Idx < SimpVect.size(); Idx++) + R += *SimpVect[Idx]; + + // Pop all addends being folded and push the resulting folded addend. + SimpVect.resize(StartIdx); + if (Val != 0) { + if (!R.isZero()) { + SimpVect.push_back(&R); + } + } else { + // Don't push constant addend at this time. It will be the last element + // of <SimpVect>. + ConstAdd = &R; + } + } + } + + assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) && + "out-of-bound access"); + + if (ConstAdd) + SimpVect.push_back(ConstAdd); + + Value *Result; + if (!SimpVect.empty()) + Result = createNaryFAdd(SimpVect, InstrQuota); + else { + // The addition is folded to 0.0. + Result = ConstantFP::get(Instr->getType(), 0.0); + } + + return Result; +} + +Value *FAddCombine::createNaryFAdd + (const AddendVect &Opnds, unsigned InstrQuota) { + assert(!Opnds.empty() && "Expect at least one addend"); + + // Step 1: Check if the # of instructions needed exceeds the quota. + // + unsigned InstrNeeded = calcInstrNumber(Opnds); + if (InstrNeeded > InstrQuota) + return 0; + + initCreateInstNum(); + + // step 2: Emit the N-ary addition. + // Note that at most three instructions are involved in Fadd-InstCombine: the + // addition in question, and at most two neighboring instructions. + // The resulting optimized addition should have at least one less instruction + // than the original addition expression tree. This implies that the resulting + // N-ary addition has at most two instructions, and we don't need to worry + // about tree-height when constructing the N-ary addition. + + Value *LastVal = 0; + bool LastValNeedNeg = false; + + // Iterate the addends, creating fadd/fsub using adjacent two addends. + for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end(); + I != E; I++) { + bool NeedNeg; + Value *V = createAddendVal(**I, NeedNeg); + if (!LastVal) { + LastVal = V; + LastValNeedNeg = NeedNeg; + continue; + } + + if (LastValNeedNeg == NeedNeg) { + LastVal = createFAdd(LastVal, V); + continue; + } + + if (LastValNeedNeg) + LastVal = createFSub(V, LastVal); + else + LastVal = createFSub(LastVal, V); + + LastValNeedNeg = false; + } + + if (LastValNeedNeg) { + LastVal = createFNeg(LastVal); + } + + #ifndef NDEBUG + assert(CreateInstrNum == InstrNeeded && + "Inconsistent in instruction numbers"); + #endif + + return LastVal; +} + +Value *FAddCombine::createFSub + (Value *Opnd0, Value *Opnd1) { + Value *V = Builder->CreateFSub(Opnd0, Opnd1); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); + return V; +} + +Value *FAddCombine::createFNeg(Value *V) { + Value *Zero = cast<Value>(ConstantFP::get(V->getType(), 0.0)); + return createFSub(Zero, V); +} + +Value *FAddCombine::createFAdd + (Value *Opnd0, Value *Opnd1) { + Value *V = Builder->CreateFAdd(Opnd0, Opnd1); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); + return V; +} + +Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) { + Value *V = Builder->CreateFMul(Opnd0, Opnd1); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); + return V; +} + +Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) { + Value *V = Builder->CreateFDiv(Opnd0, Opnd1); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); + return V; +} + +void FAddCombine::createInstPostProc(Instruction *NewInstr) { + NewInstr->setDebugLoc(Instr->getDebugLoc()); + + // Keep track of the number of instruction created. + incCreateInstNum(); + + // Propagate fast-math flags + NewInstr->setFastMathFlags(Instr->getFastMathFlags()); +} + +// Return the number of instruction needed to emit the N-ary addition. +// NOTE: Keep this function in sync with createAddendVal(). +unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) { + unsigned OpndNum = Opnds.size(); + unsigned InstrNeeded = OpndNum - 1; + + // The number of addends in the form of "(-1)*x". + unsigned NegOpndNum = 0; + + // Adjust the number of instructions needed to emit the N-ary add. + for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end(); + I != E; I++) { + const FAddend *Opnd = *I; + if (Opnd->isConstant()) + continue; + + const FAddendCoef &CE = Opnd->getCoef(); + if (CE.isMinusOne() || CE.isMinusTwo()) + NegOpndNum++; + + // Let the addend be "c * x". If "c == +/-1", the value of the addend + // is immediately available; otherwise, it needs exactly one instruction + // to evaluate the value. + if (!CE.isMinusOne() && !CE.isOne()) + InstrNeeded++; + } + if (NegOpndNum == OpndNum) + InstrNeeded++; + return InstrNeeded; +} + +// Input Addend Value NeedNeg(output) +// ================================================================ +// Constant C C false +// <+/-1, V> V coefficient is -1 +// <2/-2, V> "fadd V, V" coefficient is -2 +// <C, V> "fmul V, C" false +// +// NOTE: Keep this function in sync with FAddCombine::calcInstrNumber. +Value *FAddCombine::createAddendVal + (const FAddend &Opnd, bool &NeedNeg) { + const FAddendCoef &Coeff = Opnd.getCoef(); + + if (Opnd.isConstant()) { + NeedNeg = false; + return Coeff.getValue(Instr->getType()); + } + + Value *OpndVal = Opnd.getSymVal(); + + if (Coeff.isMinusOne() || Coeff.isOne()) { + NeedNeg = Coeff.isMinusOne(); + return OpndVal; + } + + if (Coeff.isTwo() || Coeff.isMinusTwo()) { + NeedNeg = Coeff.isMinusTwo(); + return createFAdd(OpndVal, OpndVal); + } + + NeedNeg = false; + return createFMul(OpndVal, Coeff.getValue(Instr->getType())); +} + /// AddOne - Add one to a ConstantInt. static Constant *AddOne(Constant *C) { return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); } + /// SubOne - Subtract one from a ConstantInt. static Constant *SubOne(ConstantInt *C) { return ConstantInt::get(C->getContext(), C->getValue()-1); @@ -37,10 +861,10 @@ static Constant *SubOne(ConstantInt *C) { static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) { if (!V->hasOneUse() || !V->getType()->isIntegerTy()) return 0; - + Instruction *I = dyn_cast<Instruction>(V); if (I == 0) return 0; - + if (I->getOpcode() == Instruction::Mul) if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) return I->getOperand(0); @@ -64,22 +888,22 @@ static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) { bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { // There are different heuristics we can use for this. Here are some simple // ones. - - // Add has the property that adding any two 2's complement numbers can only + + // Add has the property that adding any two 2's complement numbers can only // have one carry bit which can change a sign. As such, if LHS and RHS each // have at least two sign bits, we know that the addition of the two values // will sign extend fine. if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1) return true; - - + + // If one of the operands only has one non-zero bit, and if the other operand // has a known-zero bit in a more significant place than it (not including the // sign bit) the ripple may go up to and fill the zero, but won't change the // sign. For example, (X & ~4) + 1. - + // TODO: Implement. - + return false; } @@ -100,7 +924,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { const APInt &Val = CI->getValue(); if (Val.isSignBit()) return BinaryOperator::CreateXor(LHS, RHS); - + // See if SimplifyDemandedBits can simplify this. This handles stuff like // (X & 254)+1 -> (X&254)|1 if (SimplifyDemandedInstructionBits(I)) @@ -110,7 +934,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS)) if (ZI->getSrcTy()->isIntegerTy(1)) return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); - + Value *XorLHS = 0; ConstantInt *XorRHS = 0; if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); @@ -124,13 +948,13 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { else if (XorRHS->getValue().isPowerOf2()) ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1; } - + if (ExtendAmt) { APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt); if (!MaskedValueIsZero(XorLHS, Mask)) ExtendAmt = 0; } - + if (ExtendAmt) { Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt); Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext"); @@ -175,7 +999,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); return BinaryOperator::CreateNeg(NewAdd); } - + return BinaryOperator::CreateSub(RHS, LHSV); } @@ -209,7 +1033,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { APInt RHSKnownOne(IT->getBitWidth(), 0); APInt RHSKnownZero(IT->getBitWidth(), 0); ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); - + // No bits in common -> bitwise or. if ((LHSKnownZero|RHSKnownZero).isAllOnesValue()) return BinaryOperator::CreateOr(LHS, RHS); @@ -251,7 +1075,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // See if all bits from the first bit set in the Add RHS up are included // in the mask. First, get the rightmost bit. const APInt &AddRHSV = CRHS->getValue(); - + // Form a mask of all bits from the lowest bit added through the top. APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); @@ -289,7 +1113,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A)))) // Fold the add into the true select value. return SelectInst::Create(SI->getCondition(), N, A); - + if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A)))) // Fold the add into the false select value. return SelectInst::Create(SI->getCondition(), A, N); @@ -301,18 +1125,18 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) { // (add (sext x), cst) --> (sext (add x, cst')) if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) { - Constant *CI = + Constant *CI = ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); if (LHSConv->hasOneUse() && ConstantExpr::getSExt(CI, I.getType()) == RHSC && WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { // Insert the new, smaller add. - Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv"); return new SExtInst(NewAdd, I.getType()); } } - + // (add (sext x), (sext y)) --> (sext (add int x, y)) if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) { // Only do this if x/y have the same type, if at last one of them has a @@ -323,7 +1147,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { WillNotOverflowSignedAdd(LHSConv->getOperand(0), RHSConv->getOperand(0))) { // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv"); return new SExtInst(NewAdd, I.getType()); } @@ -351,18 +1175,12 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - if (Constant *RHSC = dyn_cast<Constant>(RHS)) { - // X + 0 --> X - if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) { - if (CFP->isExactlyValue(ConstantFP::getNegativeZero - (I.getType())->getValueAPF())) - return ReplaceInstUsesWith(I, LHS); - } + if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), TD)) + return ReplaceInstUsesWith(I, V); - if (isa<PHINode>(LHS)) - if (Instruction *NV = FoldOpIntoPhi(I)) - return NV; - } + if (isa<Constant>(RHS) && isa<PHINode>(LHS)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; // -A + B --> B - A // -A + -B --> -(A + B) @@ -374,11 +1192,6 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { if (Value *V = dyn_castFNegVal(RHS)) return BinaryOperator::CreateFSub(LHS, V); - // Check for X+0.0. Simplify it to X if we know X is not -0.0. - if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) - if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS)) - return ReplaceInstUsesWith(I, LHS); - // Check for (fadd double (sitofp x), y), see if we can merge this into an // integer add followed by a promotion. if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) { @@ -388,7 +1201,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { // requires a constant pool load, and generally allows the add to be better // instcombined. if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) { - Constant *CI = + Constant *CI = ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType()); if (LHSConv->hasOneUse() && ConstantExpr::getSIToFP(CI, I.getType()) == CFP && @@ -399,7 +1212,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { return new SIToFPInst(NewAdd, I.getType()); } } - + // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) { // Only do this if x/y have the same type, if at last one of them has a @@ -410,13 +1223,18 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { WillNotOverflowSignedAdd(LHSConv->getOperand(0), RHSConv->getOperand(0))) { // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), RHSConv->getOperand(0),"addconv"); return new SIToFPInst(NewAdd, I.getType()); } } } - + + if (I.hasUnsafeAlgebra()) { + if (Value *V = FAddCombine(Builder).simplify(&I)) + return ReplaceInstUsesWith(I, V); + } + return Changed ? &I : 0; } @@ -428,7 +1246,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, Type *Ty) { assert(TD && "Must have target data info for this"); - + // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize // this. bool Swapped = false; @@ -451,7 +1269,7 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, } } } - + if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) { // X - (gep X, ...) if (RHSGEP->getOperand(0) == LHS) { @@ -467,16 +1285,16 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, } } } - + // Avoid duplicating the arithmetic if GEP2 has non-constant indices and // multiple users. if (GEP1 == 0 || (GEP2 != 0 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse())) return 0; - + // Emit the offset of the GEP and an intptr_t. Value *Result = EmitGEPOffset(GEP1); - + // If we had a constant expression GEP on the other side offsetting the // pointer, subtract it from the offset we have. if (GEP2) { @@ -517,7 +1335,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // Replace (-1 - A) with (~A). if (match(Op0, m_AllOnes())) return BinaryOperator::CreateNot(Op1); - + if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) { // C - ~X == X + (1+C) Value *X = 0; @@ -551,20 +1369,30 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (SimplifyDemandedInstructionBits(I)) return &I; + + // Fold (sub 0, (zext bool to B)) --> (sext bool to B) + if (C->isZero() && match(Op1, m_ZExt(m_Value(X)))) + if (X->getType()->isIntegerTy(1)) + return CastInst::CreateSExtOrBitCast(X, Op1->getType()); + + // Fold (sub 0, (sext bool to B)) --> (zext bool to B) + if (C->isZero() && match(Op1, m_SExt(m_Value(X)))) + if (X->getType()->isIntegerTy(1)) + return CastInst::CreateZExtOrBitCast(X, Op1->getType()); } - + { Value *Y; // X-(X+Y) == -Y X-(Y+X) == -Y if (match(Op1, m_Add(m_Specific(Op0), m_Value(Y))) || match(Op1, m_Add(m_Value(Y), m_Specific(Op0)))) return BinaryOperator::CreateNeg(Y); - + // (X-Y)-X == -Y if (match(Op0, m_Sub(m_Specific(Op1), m_Value(Y)))) return BinaryOperator::CreateNeg(Y); } - + if (Op1->hasOneUse()) { Value *X = 0, *Y = 0, *Z = 0; Constant *C = 0; @@ -581,7 +1409,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { match(Op1, m_And(m_Specific(Op0), m_Value(Y)))) return BinaryOperator::CreateAnd(Op0, Builder->CreateNot(Y, Y->getName() + ".not")); - + // 0 - (X sdiv C) -> (X sdiv -C) if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && match(Op0, m_Zero())) @@ -604,14 +1432,14 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { C = ConstantExpr::getSub(One, ConstantExpr::getShl(One, CI)); return BinaryOperator::CreateMul(Op0, C); } - + // X - A*-B -> X + A*B // X - -A*B -> X + A*B Value *A, *B; if (match(Op1, m_Mul(m_Value(A), m_Neg(m_Value(B)))) || match(Op1, m_Mul(m_Neg(m_Value(A)), m_Value(B)))) return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B)); - + // X - A*CI -> X + A*-CI // X - CI*A -> X + A*-CI if (match(Op1, m_Mul(m_Value(A), m_ConstantInt(CI))) || @@ -630,7 +1458,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (X == dyn_castFoldableMul(Op1, C2)) return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2)); } - + // Optimize pointer differences into the same array into a size. Consider: // &A[10] - &A[0]: we should compile this to "10". if (TD) { @@ -639,23 +1467,31 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { match(Op1, m_PtrToInt(m_Value(RHSOp)))) if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) return ReplaceInstUsesWith(I, Res); - + // trunc(p)-trunc(q) -> trunc(p-q) if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) && match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) return ReplaceInstUsesWith(I, Res); } - + return 0; } Instruction *InstCombiner::visitFSub(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), TD)) + return ReplaceInstUsesWith(I, V); + // If this is a 'B = x-(-A)', change to B = x+A... if (Value *V = dyn_castFNegVal(Op1)) return BinaryOperator::CreateFAdd(Op0, V); + if (I.hasUnsafeAlgebra()) { + if (Value *V = FAddCombine(Builder).simplify(&I)) + return ReplaceInstUsesWith(I, V); + } + return 0; } |